Skip to content

Commit 45930c9

Browse files
committed
cont : make deepseek2 consistent
1 parent 59b9e36 commit 45930c9

File tree

3 files changed

+13
-3
lines changed

3 files changed

+13
-3
lines changed

convert_hf_to_gguf.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7286,7 +7286,11 @@ def set_gguf_parameters(self):
72867286
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN)
72877287
self.gguf_writer.add_rope_scaling_factor(rope_scaling["factor"])
72887288
self.gguf_writer.add_rope_scaling_orig_ctx_len(rope_scaling["original_max_position_embeddings"])
7289-
self.gguf_writer.add_rope_scaling_yarn_log_mul(rope_scaling["mscale_all_dim"])
7289+
7290+
# [TAG_DEEPSEEK2_YARN_LOG_MUL_FIX]
7291+
# note: for legacy reasons, this is not consistent with the other usages of self.gguf_writer.add_rope_scaling_yarn_log_mul
7292+
# ref https://github.com/ggml-org/llama.cpp/pull/17945
7293+
self.gguf_writer.add_rope_scaling_yarn_log_mul(0.1 * rope_scaling["mscale_all_dim"])
72907294

72917295
_experts: list[dict[str, Tensor]] | None = None
72927296

src/llama-model.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1635,7 +1635,12 @@ void llama_model::load_hparams(llama_model_loader & ml) {
16351635
// that have no expert_gating_func model parameter set
16361636
hparams.expert_gating_func = LLAMA_EXPERT_GATING_FUNC_TYPE_SOFTMAX;
16371637
}
1638-
ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul, 0.0f);
1638+
1639+
if (ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul, 0.0f)) {
1640+
// [TAG_DEEPSEEK2_YARN_LOG_MUL_FIX]
1641+
// cancel the factor from the convert script
1642+
hparams.rope_yarn_log_mul /= 0.1f;
1643+
}
16391644

16401645
// (optional) temperature tuning - used by mistral-large
16411646
ml.get_key(LLM_KV_ATTENTION_TEMPERATURE_SCALE, hparams.f_attn_temp_scale, false);

src/models/deepseek2.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@ llm_build_deepseek2::llm_build_deepseek2(const llama_model & model, const llm_gr
2020

2121
// We have to pre-scale kq_scale and attn_factor to make the YaRN RoPE work correctly.
2222
// See https://github.com/ggerganov/llama.cpp/discussions/7416 for detailed explanation.
23-
const float mscale = attn_factor * (1.0f + hparams.rope_yarn_log_mul * logf(1.0f / freq_scale));
23+
// And also: https://github.com/ggml-org/llama.cpp/pull/17945 [TAG_DEEPSEEK2_YARN_LOG_MUL_FIX]
24+
const float mscale = attn_factor * (1.0f + 0.1f * hparams.rope_yarn_log_mul * logf(1.0f / freq_scale));
2425
const float kq_scale = 1.0f * mscale * mscale / sqrtf(float(n_embd_head_k));
2526

2627
ggml_tensor * cur;

0 commit comments

Comments
 (0)