@@ -157,12 +157,6 @@ typedef void * thread_ret_t;
157157//#define GGML_SOFT_MAX_ACCELERATE
158158#endif
159159
160- #if UINTPTR_MAX == 0xFFFFFFFF
161- #define GGML_MEM_ALIGN 4
162- #else
163- #define GGML_MEM_ALIGN 16
164- #endif
165-
166160//
167161// logging
168162//
@@ -7098,11 +7092,13 @@ struct ggml_tensor * ggml_conv_transpose_2d_p0(
70987092 };
70997093
71007094 struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
7095+
7096+ ggml_set_op_params_i32(result, 0, stride);
7097+
71017098 result->op = GGML_OP_CONV_TRANSPOSE_2D;
71027099 result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
71037100 result->src[0] = a;
71047101 result->src[1] = b;
7105- result->src[2] = ggml_new_i32(ctx, stride);
71067102
71077103 return result;
71087104}
@@ -13498,7 +13494,6 @@ static void ggml_compute_forward_conv_transpose_2d(
1349813494 const struct ggml_compute_params * params,
1349913495 const struct ggml_tensor * src0,
1350013496 const struct ggml_tensor * src1,
13501- const struct ggml_tensor * opt0,
1350213497 struct ggml_tensor * dst) {
1350313498 GGML_ASSERT(src0->type == GGML_TYPE_F16);
1350413499 GGML_ASSERT(src1->type == GGML_TYPE_F32);
@@ -13558,7 +13553,7 @@ static void ggml_compute_forward_conv_transpose_2d(
1355813553 return;
1355913554 }
1356013555
13561- const int32_t stride = ((const int32_t*)(opt0->data))[0] ;
13556+ const int32_t stride = ggml_get_op_params_i32(dst, 0) ;
1356213557
1356313558 // total patches in dst
1356413559 const int np = ne2;
@@ -13571,7 +13566,7 @@ static void ggml_compute_forward_conv_transpose_2d(
1357113566 const int ip1 = MIN(ip0 + dp, np);
1357213567
1357313568 ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + 0;
13574- ggml_fp16_t * const wdata_src = (ggml_fp16_t *) params-> wdata + nk;
13569+ ggml_fp16_t * const wdata_src = wdata + nk;
1357513570
1357613571 for (int i2 = ip0; i2 < ip1; i2++) { // Cout
1357713572 float * dst_data = (float *)((char *) dst->data + i2*nb2);
@@ -13583,9 +13578,8 @@ static void ggml_compute_forward_conv_transpose_2d(
1358313578 for (int i00 = 0; i00 < ne00; i00++) {
1358413579 float v = 0;
1358513580 ggml_vec_dot_f16(ne03, &v,
13586- (ggml_fp16_t *) wdata_src + i1n,
13587- (ggml_fp16_t *) wdata_kernel + i01*ne00*ne03 + i00*ne03);
13588-
13581+ wdata_src + i1n,
13582+ wdata_kernel + i01*ne00*ne03 + i00*ne03);
1358913583 dst_data[(i11*stride + i01)*ne0 + i10*stride + i00] += v;
1359013584 }
1359113585 }
@@ -15732,7 +15726,7 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
1573215726 } break;
1573315727 case GGML_OP_CONV_TRANSPOSE_2D:
1573415728 {
15735- ggml_compute_forward_conv_transpose_2d(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor );
15729+ ggml_compute_forward_conv_transpose_2d(params, tensor->src[0], tensor->src[1], tensor);
1573615730 } break;
1573715731 case GGML_OP_POOL_1D:
1573815732 {
0 commit comments