@@ -1635,7 +1635,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
16351635 // that have no expert_gating_func model parameter set
16361636 hparams.expert_gating_func = LLAMA_EXPERT_GATING_FUNC_TYPE_SOFTMAX;
16371637 }
1638- ml.get_key (LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul , false );
1638+ ml.get_key (LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul , 0 . 0f );
16391639
16401640 // (optional) temperature tuning - used by mistral-large
16411641 ml.get_key (LLM_KV_ATTENTION_TEMPERATURE_SCALE, hparams.f_attn_temp_scale , false );
@@ -2267,9 +2267,9 @@ void llama_model::load_hparams(llama_model_loader & ml) {
22672267 ml.get_key (LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps );
22682268 ml.get_key (LLM_KV_ATTENTION_TEMPERATURE_SCALE, hparams.f_attn_temp_scale , false );
22692269
2270- ml.get_key (LLM_KV_ROPE_SCALING_YARN_BETA_FAST, hparams.yarn_beta_fast , false );
2271- ml.get_key (LLM_KV_ROPE_SCALING_YARN_BETA_SLOW, hparams.yarn_beta_slow , false );
2272- ml.get_key (LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul , false );
2270+ ml.get_key (LLM_KV_ROPE_SCALING_YARN_BETA_FAST, hparams.yarn_beta_fast , false );
2271+ ml.get_key (LLM_KV_ROPE_SCALING_YARN_BETA_SLOW, hparams.yarn_beta_slow , false );
2272+ ml.get_key (LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul , 0 . 0f );
22732273
22742274 // TODO: maybe add n_attn_temp_floor_scale as a separate KV?
22752275 if (hparams.f_attn_temp_scale != 0 .0f ) {
@@ -2289,6 +2289,23 @@ void llama_model::load_hparams(llama_model_loader & ml) {
22892289 default : throw std::runtime_error (" unsupported model architecture" );
22902290 }
22912291
2292+ // ref: https://github.com/huggingface/transformers/blob/6d00f6b0a5679c36510f203e4226e36f517c3032/src/transformers/modeling_rope_utils.py#L336-L348
2293+ if (hparams.rope_yarn_log_mul != 0 .0f ) {
2294+ const float factor = 1 .0f / hparams.rope_freq_scale_train ;
2295+
2296+ const float mscale = 1 .0f ;
2297+ const float mscale_all_dims = hparams.rope_yarn_log_mul ;
2298+
2299+ static auto get_mscale = [](float scale, float mscale) {
2300+ return scale <= 1 .0f ? 1 .0f : (0 .1f * mscale * logf (scale) + 1 .0f );
2301+ };
2302+
2303+ hparams.yarn_attn_factor = get_mscale (factor, mscale) / get_mscale (factor, mscale_all_dims);
2304+
2305+ LLAMA_LOG_WARN (" %s: setting new yarn_attn_factor = %.4f (mscale == %.1f, mscale_all_dim = %.1f)\n " ,
2306+ __func__, hparams.yarn_attn_factor , mscale, mscale_all_dims);
2307+ }
2308+
22922309 pimpl->n_bytes = ml.n_bytes ;
22932310
22942311 pimpl->desc_str = arch_name () + " " + type_name () + " " + ml.ftype_name ();
@@ -6794,6 +6811,7 @@ void llama_model::print_info() const {
67946811 LLAMA_LOG_INFO (" %s: freq_base_train = %.1f\n " , __func__, hparams.rope_freq_base_train );
67956812 LLAMA_LOG_INFO (" %s: freq_scale_train = %g\n " , __func__, hparams.rope_freq_scale_train );
67966813 LLAMA_LOG_INFO (" %s: n_ctx_orig_yarn = %u\n " , __func__, hparams.n_ctx_orig_yarn );
6814+ LLAMA_LOG_INFO (" %s: rope_yarn_log_mul= %.4f\n " , __func__, hparams.rope_yarn_log_mul );
67976815 LLAMA_LOG_INFO (" %s: rope_finetuned = %s\n " , __func__, hparams.rope_finetuned ? " yes" : " unknown" );
67986816 // MRoPE (Multi-axis Rotary Position Embedding) sections
67996817 if (const auto & s = hparams.rope_sections ; s[0 ] || s[1 ] || s[2 ] || s[3 ]) {
@@ -6857,7 +6875,6 @@ void llama_model::print_info() const {
68576875 LLAMA_LOG_INFO (" %s: expert_weights_scale = %.1f\n " , __func__, hparams.expert_weights_scale );
68586876 LLAMA_LOG_INFO (" %s: expert_weights_norm = %d\n " , __func__, hparams.expert_weights_norm );
68596877 LLAMA_LOG_INFO (" %s: expert_gating_func = %s\n " , __func__, llama_expert_gating_func_name ((llama_expert_gating_func_type) hparams.expert_gating_func ));
6860- LLAMA_LOG_INFO (" %s: rope_yarn_log_mul = %.4f\n " , __func__, hparams.rope_yarn_log_mul );
68616878 }
68626879
68636880 if (arch == LLM_ARCH_QWEN2MOE) {
0 commit comments