From 926b566578c5ac4ce07935fb100fbbbd0cb938f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Tue, 10 Jun 2025 13:49:36 +0200 Subject: [PATCH 1/2] fix duplicate key DeepSeek-R1 conversion error --- convert_hf_to_gguf.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 7b9893c8a3e10..8615aebe4c454 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -556,8 +556,11 @@ def set_gguf_parameters(self): logger.info(f"gguf: experts used count = {n_experts_used}") if (head_dim := self.hparams.get("head_dim")) is not None: - self.gguf_writer.add_key_length(head_dim) - self.gguf_writer.add_value_length(head_dim) + # Workaround for incorrect AutoConfig value for DeepSeekV3 (is set correctly in DeepSeekV2Model class) + # https://github.com/huggingface/transformers/blob/19224c3642705c5b6988c9f5f4251f83323d05ae/src/transformers/models/deepseek_v3/configuration_deepseek_v3.py#L210 + if self.hparams.get("qk_rope_head_dim") != head_dim: + self.gguf_writer.add_key_length(head_dim) + self.gguf_writer.add_value_length(head_dim) self.gguf_writer.add_file_type(self.ftype) logger.info(f"gguf: file type = {self.ftype}") From d6bd4e029b65d4aca0f71c6ede66a80d6fca46fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sigbj=C3=B8rn=20Skj=C3=A6ret?= Date: Tue, 10 Jun 2025 23:21:43 +0200 Subject: [PATCH 2/2] check model_type instead --- convert_hf_to_gguf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 8615aebe4c454..13dcbdc3cf1fc 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -558,7 +558,7 @@ def set_gguf_parameters(self): if (head_dim := self.hparams.get("head_dim")) is not None: # Workaround for incorrect AutoConfig value for DeepSeekV3 (is set correctly in DeepSeekV2Model class) # https://github.com/huggingface/transformers/blob/19224c3642705c5b6988c9f5f4251f83323d05ae/src/transformers/models/deepseek_v3/configuration_deepseek_v3.py#L210 - if self.hparams.get("qk_rope_head_dim") != head_dim: + if self.hparams.get("model_type") != "deepseek_v3": self.gguf_writer.add_key_length(head_dim) self.gguf_writer.add_value_length(head_dim)