@@ -751,45 +751,56 @@ static void cann_copy(ggml_backend_cann_context& ctx, aclTensor* acl_src,
751
751
}
752
752
753
753
void ggml_cann_dup (ggml_backend_cann_context& ctx, ggml_tensor* dst) {
754
- ggml_tensor* src0 = dst->src [0 ];
755
- void * src_trans_buffer = src0->data ;
756
- ggml_cann_pool_alloc src_buffer_allocator;
757
- if (!ggml_is_contiguous (src0)) {
754
+ ggml_tensor* src0 = dst->src [0 ];
755
+
756
+ if (ggml_are_same_shape (src0, dst)) {
758
757
aclTensor* acl_src = ggml_cann_create_tensor (src0);
759
- src_buffer_allocator.alloc (ctx.pool (),
760
- ggml_nelements (src0) * ggml_type_size (src0->type ));
761
- src_trans_buffer = src_buffer_allocator.get ();
762
- size_t src_trans_nb[GGML_MAX_DIMS];
763
- src_trans_nb[0 ] = ggml_type_size (src0->type );
764
- for (int i = 1 ; i < GGML_MAX_DIMS; i++) {
765
- src_trans_nb[i] = src_trans_nb[i - 1 ] * src0->ne [i - 1 ];
758
+ aclTensor* acl_dst = ggml_cann_create_tensor (dst);
759
+ if (dst->type == src0->type ) {
760
+ cann_copy (ctx, acl_src, acl_dst);
761
+ } else {
762
+ aclnn_cast (ctx, acl_src, acl_dst, ggml_cann_type_mapping (dst->type ));
763
+ }
764
+ ggml_cann_release_resources (ctx, acl_src, acl_dst);
765
+ } else {
766
+ void * src_trans_buffer = src0->data ;
767
+ ggml_cann_pool_alloc src_buffer_allocator;
768
+ if (!ggml_is_contiguous (src0)) {
769
+ aclTensor* acl_src = ggml_cann_create_tensor (src0);
770
+ src_buffer_allocator.alloc (ctx.pool (),
771
+ ggml_nelements (src0) * ggml_type_size (src0->type ));
772
+ src_trans_buffer = src_buffer_allocator.get ();
773
+ size_t src_trans_nb[GGML_MAX_DIMS];
774
+ src_trans_nb[0 ] = ggml_type_size (src0->type );
775
+ for (int i = 1 ; i < GGML_MAX_DIMS; i++) {
776
+ src_trans_nb[i] = src_trans_nb[i - 1 ] * src0->ne [i - 1 ];
777
+ }
778
+ aclTensor* src_trans_tensor = ggml_cann_create_tensor (
779
+ src_trans_buffer, ggml_cann_type_mapping (src0->type ),
780
+ ggml_type_size (src0->type ), src0->ne , src_trans_nb,
781
+ GGML_MAX_DIMS);
782
+ cann_copy (ctx, acl_src, src_trans_tensor);
783
+ ggml_cann_release_resources (ctx, acl_src, src_trans_tensor);
766
784
}
767
- aclTensor* src_trans_tensor = ggml_cann_create_tensor (
768
- src_trans_buffer, ggml_cann_type_mapping (src0->type ),
769
- ggml_type_size (src0->type ), src0->ne , src_trans_nb,
770
- GGML_MAX_DIMS);
771
- cann_copy (ctx, acl_src, src_trans_tensor);
772
- ggml_cann_release_resources (ctx, acl_src, src_trans_tensor);
773
- }
774
785
775
- size_t src_reshape_nb[GGML_MAX_DIMS];
776
- src_reshape_nb[0 ] = ggml_type_size (src0->type );
777
- for (int i = 1 ; i < GGML_MAX_DIMS; i++) {
778
- src_reshape_nb[i] = src_reshape_nb[i - 1 ] * dst->ne [i - 1 ];
779
- }
786
+ size_t src_reshape_nb[GGML_MAX_DIMS];
787
+ src_reshape_nb[0 ] = ggml_type_size (src0->type );
788
+ for (int i = 1 ; i < GGML_MAX_DIMS; i++) {
789
+ src_reshape_nb[i] = src_reshape_nb[i - 1 ] * dst->ne [i - 1 ];
790
+ }
780
791
781
- aclTensor* trans_acl_src = ggml_cann_create_tensor (src_trans_buffer,
782
- ggml_cann_type_mapping (src0->type ),ggml_type_size (src0->type ),
783
- dst->ne , src_reshape_nb, GGML_MAX_DIMS, ACL_FORMAT_ND);
784
- aclTensor* acl_dst = ggml_cann_create_tensor (dst);
792
+ aclTensor* trans_acl_src = ggml_cann_create_tensor (src_trans_buffer,
793
+ ggml_cann_type_mapping (src0->type ),ggml_type_size (src0->type ),
794
+ dst->ne , src_reshape_nb, GGML_MAX_DIMS, ACL_FORMAT_ND);
795
+ aclTensor* acl_dst = ggml_cann_create_tensor (dst);
785
796
786
- if (dst->type == src0->type ) {
787
- cann_copy (ctx, trans_acl_src, acl_dst);
788
- } else {
789
- aclnn_cast (ctx, trans_acl_src, acl_dst, ggml_cann_type_mapping (dst->type ));
797
+ if (dst->type == src0->type ) {
798
+ cann_copy (ctx, trans_acl_src, acl_dst);
799
+ } else {
800
+ aclnn_cast (ctx, trans_acl_src, acl_dst, ggml_cann_type_mapping (dst->type ));
801
+ }
802
+ ggml_cann_release_resources (ctx, trans_acl_src, acl_dst);
790
803
}
791
-
792
- ggml_cann_release_resources (ctx, trans_acl_src, acl_dst);
793
804
return ;
794
805
}
795
806
0 commit comments