@@ -1235,16 +1235,16 @@ struct clip_image_f32 * clip_image_f32_init() {
12351235
12361236void clip_image_u8_free (struct clip_image_u8 * img) { delete img; }
12371237void clip_image_f32_free (struct clip_image_f32 * img) { delete img; }
1238- void clip_image_u8_batch_free (struct clip_image_u8_batch & batch) {
1239- if (batch. size > 0 ) {
1240- delete[] batch. data ;
1241- batch. size = 0 ;
1238+ void clip_image_u8_batch_free (struct clip_image_u8_batch * batch) {
1239+ if (batch-> size > 0 ) {
1240+ delete[] batch-> data ;
1241+ batch-> size = 0 ;
12421242 }
12431243}
1244- void clip_image_f32_batch_free (struct clip_image_f32_batch & batch) {
1245- if (batch. size > 0 ) {
1246- delete[] batch. data ;
1247- batch. size = 0 ;
1244+ void clip_image_f32_batch_free (struct clip_image_f32_batch * batch) {
1245+ if (batch-> size > 0 ) {
1246+ delete[] batch-> data ;
1247+ batch-> size = 0 ;
12481248 }
12491249}
12501250
@@ -1497,7 +1497,7 @@ static std::vector<clip_image_u8*> divide_to_patches_u8(const clip_image_u8 & im
14971497
14981498// returns the normalized float tensor for llava-1.5, for spatial_unpad with anyres processing for llava-1.6 it returns the normalized image patch tensors as a vector
14991499// res_imgs memory is being allocated here, previous allocations will be freed if found
1500- bool clip_image_preprocess (struct clip_ctx * ctx, const clip_image_u8 * img, clip_image_f32_batch & res_imgs) {
1500+ bool clip_image_preprocess (struct clip_ctx * ctx, const clip_image_u8 * img, clip_image_f32_batch * res_imgs) {
15011501 bool pad_to_square = true ;
15021502 if (!ctx->has_vision_encoder ) {
15031503 printf (" This gguf file seems to have no vision encoder\n " );
@@ -1509,11 +1509,11 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, cli
15091509 pad_to_square = false ;
15101510 }
15111511 // free the previous res_imgs if any set
1512- if (res_imgs. size > 0 ) {
1512+ if (res_imgs-> size > 0 ) {
15131513 clip_image_f32_batch_free (res_imgs);
15141514 }
1515- res_imgs. data = nullptr ;
1516- res_imgs. size = 0 ;
1515+ res_imgs-> data = nullptr ;
1516+ res_imgs-> size = 0 ;
15171517
15181518 // the logic below is to pad the shorter side to the longer side with a background color: rgb(122, 116, 104)
15191519 // see https://github.com/haotian-liu/LLaVA/blob/e854a2bf85118c504f6f16bf5c3c7c92f8fa8c6b/llava/conversation.py#L113-L156
@@ -1568,11 +1568,11 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, cli
15681568 bicubic_resize (*img, *image_original_resize, params.image_size , params.image_size ); // in python this is "shortest_edge", but all CLIP are square
15691569 patches.insert (patches.begin (), image_original_resize);
15701570 // clip_image_f32_batch_init(patches.size());
1571- res_imgs. size = patches.size ();
1572- res_imgs. data = new clip_image_f32[res_imgs. size ];
1571+ res_imgs-> size = patches.size ();
1572+ res_imgs-> data = new clip_image_f32[res_imgs-> size ];
15731573 int num=0 ;
15741574 for (auto & patch : patches) {
1575- normalize_image_u8_to_f32 (patch, &res_imgs. data [num], ctx->image_mean , ctx->image_std );
1575+ normalize_image_u8_to_f32 (patch, &res_imgs-> data [num], ctx->image_mean , ctx->image_std );
15761576 num++;
15771577 }
15781578
@@ -1660,9 +1660,9 @@ bool clip_image_preprocess(struct clip_ctx * ctx, const clip_image_u8 * img, cli
16601660 // }
16611661 // res_imgs.push_back(res);
16621662
1663- res_imgs. size = 1 ;
1664- res_imgs. data = new clip_image_f32[res_imgs. size ];
1665- res_imgs. data [0 ] = *res;
1663+ res_imgs-> size = 1 ;
1664+ res_imgs-> data = new clip_image_f32[res_imgs-> size ];
1665+ res_imgs-> data [0 ] = *res;
16661666 clip_image_f32_free (res);
16671667
16681668 return true ;
0 commit comments