From a765f3bd65b563a1ce03e21320a64de622c4a590 Mon Sep 17 00:00:00 2001 From: Dennis Keck <26092524+fellhorn@users.noreply.github.com> Date: Sun, 31 Aug 2025 23:34:04 +0200 Subject: [PATCH 1/5] Address code review comments Signed-off-by: Dennis Keck <26092524+fellhorn@users.noreply.github.com> --- llama-cpp-2/src/mtmd.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/llama-cpp-2/src/mtmd.rs b/llama-cpp-2/src/mtmd.rs index e1f712ad..d03dcfeb 100644 --- a/llama-cpp-2/src/mtmd.rs +++ b/llama-cpp-2/src/mtmd.rs @@ -43,7 +43,7 @@ impl From for MtmdInputChunkType { llama_cpp_sys_2::MTMD_INPUT_CHUNK_TYPE_TEXT => MtmdInputChunkType::Text, llama_cpp_sys_2::MTMD_INPUT_CHUNK_TYPE_IMAGE => MtmdInputChunkType::Image, llama_cpp_sys_2::MTMD_INPUT_CHUNK_TYPE_AUDIO => MtmdInputChunkType::Audio, - _ => panic!("Unknown MTMD input chunk type"), + _ => panic!("Unknown MTMD input chunk type: {}", chunk_type), } } } @@ -211,10 +211,11 @@ impl MtmdContext { } /// Get audio bitrate in Hz (e.g., 16000 for Whisper). - /// Returns -1 if audio is not supported. + /// Returns None if audio is not supported. #[must_use] - pub fn get_audio_bitrate(&self) -> i32 { - unsafe { llama_cpp_sys_2::mtmd_get_audio_bitrate(self.context.as_ptr()) } + pub fn get_audio_bitrate(&self) -> Option { + let rate = unsafe { llama_cpp_sys_2::mtmd_get_audio_bitrate(self.context.as_ptr()) }; + (rate > 0).then(|| rate as u32) } /// Tokenize input text and bitmaps into chunks. From ae21a1cc063019f8a2cb30626f185f128dd40570 Mon Sep 17 00:00:00 2001 From: Dennis Keck <26092524+fellhorn@users.noreply.github.com> Date: Tue, 2 Sep 2025 00:10:48 +0200 Subject: [PATCH 2/5] Clippy & fix batch size Signed-off-by: Dennis Keck <26092524+fellhorn@users.noreply.github.com> --- examples/mtmd/src/mtmd.rs | 12 ++++++++---- llama-cpp-2/src/mtmd.rs | 33 ++++++++++++++++----------------- 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/examples/mtmd/src/mtmd.rs b/examples/mtmd/src/mtmd.rs index 6a704d3c..7e0c3179 100644 --- a/examples/mtmd/src/mtmd.rs +++ b/examples/mtmd/src/mtmd.rs @@ -50,6 +50,9 @@ pub struct MtmdCliParams { /// Number of threads #[arg(short = 't', long = "threads", value_name = "N", default_value = "4")] pub n_threads: i32, + /// Number of tokens to process in a batch during eval chunks + #[arg(long = "batch-size", value_name = "b", default_value = "64")] + pub batch_size: i32, /// Maximum number of tokens in context #[arg(long = "n-tokens", value_name = "N", default_value = "4096")] pub n_tokens: NonZeroU32, @@ -140,6 +143,7 @@ impl MtmdCliContext { context: &mut LlamaContext, msg: LlamaChatMessage, add_bos: bool, + batch_size: i32, ) -> Result<(), Box> { self.chat.push(msg); @@ -168,7 +172,7 @@ impl MtmdCliContext { // Clear bitmaps after tokenization self.bitmaps.clear(); - self.n_past = chunks.eval_chunks(&self.mtmd_ctx, context, 0, 0, 1, true)?; + self.n_past = chunks.eval_chunks(&self.mtmd_ctx, context, 0, 0, batch_size, true)?; Ok(()) } @@ -186,7 +190,7 @@ impl MtmdCliContext { for _i in 0..max_predict { // Sample next token - let token = sampler.sample(context, 0); + let token = sampler.sample(context, -1); generated_tokens.push(token); sampler.accept(token); @@ -244,7 +248,7 @@ fn run_single_turn( println!("Evaluating message: {msg:?}"); // Evaluate the message (prefill) - ctx.eval_message(model, context, msg, true)?; + ctx.eval_message(model, context, msg, true, params.batch_size)?; // Generate response (decode) ctx.generate_response(model, context, sampler, params.n_predict)?; @@ -286,7 +290,7 @@ fn main() -> Result<(), Box> { // Create context let context_params = LlamaContextParams::default() .with_n_threads(params.n_threads) - .with_n_batch(1) + .with_n_batch(params.batch_size.try_into()?) .with_n_ctx(Some(params.n_tokens)); let mut context = model.new_context(&backend, context_params)?; diff --git a/llama-cpp-2/src/mtmd.rs b/llama-cpp-2/src/mtmd.rs index d03dcfeb..226937a5 100644 --- a/llama-cpp-2/src/mtmd.rs +++ b/llama-cpp-2/src/mtmd.rs @@ -30,10 +30,13 @@ use crate::token::LlamaToken; #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum MtmdInputChunkType { /// Text input chunk + #[allow(clippy::cast_possible_wrap)] Text = llama_cpp_sys_2::MTMD_INPUT_CHUNK_TYPE_TEXT as isize, /// Image input chunk + #[allow(clippy::cast_possible_wrap)] Image = llama_cpp_sys_2::MTMD_INPUT_CHUNK_TYPE_IMAGE as isize, /// Audio input chunk + #[allow(clippy::cast_possible_wrap)] Audio = llama_cpp_sys_2::MTMD_INPUT_CHUNK_TYPE_AUDIO as isize, } @@ -43,7 +46,7 @@ impl From for MtmdInputChunkType { llama_cpp_sys_2::MTMD_INPUT_CHUNK_TYPE_TEXT => MtmdInputChunkType::Text, llama_cpp_sys_2::MTMD_INPUT_CHUNK_TYPE_IMAGE => MtmdInputChunkType::Image, llama_cpp_sys_2::MTMD_INPUT_CHUNK_TYPE_AUDIO => MtmdInputChunkType::Audio, - _ => panic!("Unknown MTMD input chunk type: {}", chunk_type), + _ => panic!("Unknown MTMD input chunk type: {chunk_type}"), } } } @@ -106,9 +109,7 @@ impl From for MtmdContextParams { use_gpu: params.use_gpu, print_timings: params.print_timings, n_threads: params.n_threads, - media_marker: unsafe { CStr::from_ptr(params.media_marker) } - .to_owned() - .into(), + media_marker: unsafe { CStr::from_ptr(params.media_marker) }.to_owned(), } } } @@ -215,7 +216,7 @@ impl MtmdContext { #[must_use] pub fn get_audio_bitrate(&self) -> Option { let rate = unsafe { llama_cpp_sys_2::mtmd_get_audio_bitrate(self.context.as_ptr()) }; - (rate > 0).then(|| rate as u32) + (rate > 0).then_some(rate.unsigned_abs()) } /// Tokenize input text and bitmaps into chunks. @@ -276,7 +277,7 @@ impl MtmdContext { llama_cpp_sys_2::mtmd_tokenize( self.context.as_ptr(), chunks.chunks.as_ptr(), - &input_text, + &raw const input_text, bitmap_ptrs.as_ptr().cast_mut(), bitmaps.len(), ) @@ -627,15 +628,10 @@ impl MtmdInputChunks { let chunk_ptr = unsafe { llama_cpp_sys_2::mtmd_input_chunks_get(self.chunks.as_ptr(), index) }; - if chunk_ptr.is_null() { - None - } else { - // Note: We don't own this chunk, it's owned by the chunks collection - Some(MtmdInputChunk { - chunk: NonNull::new(chunk_ptr.cast_mut()).unwrap(), - owned: false, - }) - } + NonNull::new(chunk_ptr.cast_mut()).map(|ptr| MtmdInputChunk { + chunk: ptr, + owned: false, + }) } /// Get total number of tokens across all chunks. @@ -702,7 +698,7 @@ impl MtmdInputChunks { seq_id, n_batch, logits_last, - &mut new_n_past, + &raw mut new_n_past, ) }; @@ -754,7 +750,10 @@ impl MtmdInputChunk { let mut n_tokens = 0usize; let tokens_ptr = unsafe { - llama_cpp_sys_2::mtmd_input_chunk_get_tokens_text(self.chunk.as_ptr(), &mut n_tokens) + llama_cpp_sys_2::mtmd_input_chunk_get_tokens_text( + self.chunk.as_ptr(), + &raw mut n_tokens, + ) }; if tokens_ptr.is_null() || n_tokens == 0 { From 40f398e2b19e046d220ca5fdaaf0fc630ae87f51 Mon Sep 17 00:00:00 2001 From: Dennis Keck <26092524+fellhorn@users.noreply.github.com> Date: Tue, 2 Sep 2025 00:23:12 +0200 Subject: [PATCH 3/5] MtmdInputChunkType u32 repr Signed-off-by: Dennis Keck <26092524+fellhorn@users.noreply.github.com> --- llama-cpp-2/src/mtmd.rs | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/llama-cpp-2/src/mtmd.rs b/llama-cpp-2/src/mtmd.rs index 226937a5..28055619 100644 --- a/llama-cpp-2/src/mtmd.rs +++ b/llama-cpp-2/src/mtmd.rs @@ -25,19 +25,18 @@ use crate::token::LlamaToken; /// let audio_chunk = MtmdInputChunkType::Audio; /// /// assert_eq!(text_chunk, MtmdInputChunkType::Text); +/// assert_eq!(text_chunk as u32, llama_cpp_sys_2::MTMD_INPUT_CHUNK_TYPE_TEXT); /// assert_ne!(text_chunk, image_chunk); /// ``` #[derive(Debug, Clone, Copy, PartialEq, Eq)] +#[repr(u32)] pub enum MtmdInputChunkType { /// Text input chunk - #[allow(clippy::cast_possible_wrap)] - Text = llama_cpp_sys_2::MTMD_INPUT_CHUNK_TYPE_TEXT as isize, + Text = llama_cpp_sys_2::MTMD_INPUT_CHUNK_TYPE_TEXT, /// Image input chunk - #[allow(clippy::cast_possible_wrap)] - Image = llama_cpp_sys_2::MTMD_INPUT_CHUNK_TYPE_IMAGE as isize, + Image = llama_cpp_sys_2::MTMD_INPUT_CHUNK_TYPE_IMAGE, /// Audio input chunk - #[allow(clippy::cast_possible_wrap)] - Audio = llama_cpp_sys_2::MTMD_INPUT_CHUNK_TYPE_AUDIO as isize, + Audio = llama_cpp_sys_2::MTMD_INPUT_CHUNK_TYPE_AUDIO, } impl From for MtmdInputChunkType { From 3ddf41abcb1697cf96a02970ce3ce43ef68c000f Mon Sep 17 00:00:00 2001 From: Dennis Keck <26092524+fellhorn@users.noreply.github.com> Date: Tue, 2 Sep 2025 09:51:50 +0200 Subject: [PATCH 4/5] Add comment re ownership Signed-off-by: Dennis Keck <26092524+fellhorn@users.noreply.github.com> --- llama-cpp-2/src/mtmd.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/llama-cpp-2/src/mtmd.rs b/llama-cpp-2/src/mtmd.rs index 28055619..6ff50054 100644 --- a/llama-cpp-2/src/mtmd.rs +++ b/llama-cpp-2/src/mtmd.rs @@ -627,6 +627,7 @@ impl MtmdInputChunks { let chunk_ptr = unsafe { llama_cpp_sys_2::mtmd_input_chunks_get(self.chunks.as_ptr(), index) }; + // Note: We don't own this chunk, it's owned by the chunks collection NonNull::new(chunk_ptr.cast_mut()).map(|ptr| MtmdInputChunk { chunk: ptr, owned: false, From 94a83e9a31ea8dc5f920aa29afe0c1d75cd2fa0c Mon Sep 17 00:00:00 2001 From: Dennis Keck <26092524+fellhorn@users.noreply.github.com> Date: Thu, 4 Sep 2025 00:20:51 +0200 Subject: [PATCH 5/5] Fix MtmdInputChunkType win build & default bs 1 Signed-off-by: Dennis Keck <26092524+fellhorn@users.noreply.github.com> --- examples/mtmd/src/mtmd.rs | 2 +- llama-cpp-2/src/mtmd.rs | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/mtmd/src/mtmd.rs b/examples/mtmd/src/mtmd.rs index 7e0c3179..e0f52de9 100644 --- a/examples/mtmd/src/mtmd.rs +++ b/examples/mtmd/src/mtmd.rs @@ -51,7 +51,7 @@ pub struct MtmdCliParams { #[arg(short = 't', long = "threads", value_name = "N", default_value = "4")] pub n_threads: i32, /// Number of tokens to process in a batch during eval chunks - #[arg(long = "batch-size", value_name = "b", default_value = "64")] + #[arg(long = "batch-size", value_name = "b", default_value = "1")] pub batch_size: i32, /// Maximum number of tokens in context #[arg(long = "n-tokens", value_name = "N", default_value = "4096")] diff --git a/llama-cpp-2/src/mtmd.rs b/llama-cpp-2/src/mtmd.rs index 6ff50054..71b21ef8 100644 --- a/llama-cpp-2/src/mtmd.rs +++ b/llama-cpp-2/src/mtmd.rs @@ -25,18 +25,18 @@ use crate::token::LlamaToken; /// let audio_chunk = MtmdInputChunkType::Audio; /// /// assert_eq!(text_chunk, MtmdInputChunkType::Text); -/// assert_eq!(text_chunk as u32, llama_cpp_sys_2::MTMD_INPUT_CHUNK_TYPE_TEXT); +/// assert_eq!(text_chunk, llama_cpp_sys_2::MTMD_INPUT_CHUNK_TYPE_TEXT.into()); /// assert_ne!(text_chunk, image_chunk); /// ``` #[derive(Debug, Clone, Copy, PartialEq, Eq)] #[repr(u32)] pub enum MtmdInputChunkType { /// Text input chunk - Text = llama_cpp_sys_2::MTMD_INPUT_CHUNK_TYPE_TEXT, + Text = llama_cpp_sys_2::MTMD_INPUT_CHUNK_TYPE_TEXT as _, /// Image input chunk - Image = llama_cpp_sys_2::MTMD_INPUT_CHUNK_TYPE_IMAGE, + Image = llama_cpp_sys_2::MTMD_INPUT_CHUNK_TYPE_IMAGE as _, /// Audio input chunk - Audio = llama_cpp_sys_2::MTMD_INPUT_CHUNK_TYPE_AUDIO, + Audio = llama_cpp_sys_2::MTMD_INPUT_CHUNK_TYPE_AUDIO as _, } impl From for MtmdInputChunkType {