From 7ba906b7893ad94e03e75ffce7fd75e92723875c Mon Sep 17 00:00:00 2001 From: Nikolay Date: Thu, 30 Oct 2025 18:24:02 +0100 Subject: [PATCH 1/4] common params attempt #1 --- optimum/intel/openvino/configuration.py | 50 ++++++++++++++++++++----- 1 file changed, 40 insertions(+), 10 deletions(-) diff --git a/optimum/intel/openvino/configuration.py b/optimum/intel/openvino/configuration.py index 1bff6d2822..cd91bd3b55 100644 --- a/optimum/intel/openvino/configuration.py +++ b/optimum/intel/openvino/configuration.py @@ -100,7 +100,7 @@ class OVQuantizationMethod(str, Enum): "Qwen/Qwen-7B-Chat": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.6}, "Qwen/Qwen2.5-1.5B-Instruct": { "bits": 4, - "sym": False, + "sym": True, "group_size": 128, "ratio": 0.9, "dataset": "wikitext2", @@ -140,7 +140,7 @@ class OVQuantizationMethod(str, Enum): }, "Qwen/Qwen3-8B": { "bits": 4, - "sym": False, + "sym": True, "group_size": 128, "ratio": 1.0, "dataset": "wikitext2", @@ -233,8 +233,8 @@ class OVQuantizationMethod(str, Enum): }, "meta-llama/Meta-Llama-3.1-8B-Instruct": { "bits": 4, - "sym": False, - "group_size": 64, + "sym": True, + "group_size": 128, "ratio": 0.8, "quant_method": OVQuantizationMethod.AWQ, }, @@ -262,11 +262,42 @@ class OVQuantizationMethod(str, Enum): }, "microsoft/Phi-3.5-mini-instruct": { "bits": 4, - "sym": False, - "group_size": 64, + "sym": True, + "group_size": 128, "ratio": 1.0, "quant_method": OVQuantizationMethod.AWQ, }, + "microsoft/Phi-4-reasoning": { + "bits": 4, + "sym": True, + "group_size": 128, + "ratio": 1.0, + "quant_method": OVQuantizationMethod.AWQ, + }, + "google/gemma-3-4b-it": { + "bits": 4, + "sym": True, + "group_size": 128, + "ratio": 1.0, + }, + "Mistral-7B-Instruct-v0.2": { + "bits": 4, + "sym": True, + "group_size": 128, + "ratio": 1.0, + }, + "openbmb/MiniCPM4-0.5B": { + "bits": 4, + "sym": True, + "group_size": 128, + "ratio": 1.0, + }, + "openbmb/MiniCPM4-8B": { + "bits": 4, + "sym": True, + "group_size": 128, + "ratio": 1.0, + }, "microsoft/Phi-4-mini-instruct": { "bits": 4, "sym": False, @@ -290,10 +321,9 @@ class OVQuantizationMethod(str, Enum): }, "deepseek-ai/DeepSeek-R1-Distill-Llama-8B": { "bits": 4, - "sym": False, - "group_size": 64, - "ratio": 0.8, - "quant_method": OVQuantizationMethod.AWQ, + "sym": True, + "group_size": 128, + "ratio": 1, }, "microsoft/Phi-4-multimodal-instruct": { "quantization_configs": { From e34e54d90f9a61ff9dbc60304ce13fcc7ced3bb6 Mon Sep 17 00:00:00 2001 From: Nikita Savelyev Date: Fri, 31 Oct 2025 12:57:08 +0100 Subject: [PATCH 2/4] Update optimum/intel/openvino/configuration.py --- optimum/intel/openvino/configuration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/optimum/intel/openvino/configuration.py b/optimum/intel/openvino/configuration.py index cd91bd3b55..5db7a33f22 100644 --- a/optimum/intel/openvino/configuration.py +++ b/optimum/intel/openvino/configuration.py @@ -280,7 +280,7 @@ class OVQuantizationMethod(str, Enum): "group_size": 128, "ratio": 1.0, }, - "Mistral-7B-Instruct-v0.2": { + "mistralai/Mistral-7B-Instruct-v0.2": { "bits": 4, "sym": True, "group_size": 128, From e016eecb471286e0f68968a0a1a93c44aae9429b Mon Sep 17 00:00:00 2001 From: Nikolay Date: Fri, 14 Nov 2025 14:04:46 +0100 Subject: [PATCH 3/4] variant #1 with ratio 1.0 --- optimum/intel/openvino/configuration.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/optimum/intel/openvino/configuration.py b/optimum/intel/openvino/configuration.py index 5db7a33f22..0267fcbaa9 100644 --- a/optimum/intel/openvino/configuration.py +++ b/optimum/intel/openvino/configuration.py @@ -102,7 +102,7 @@ class OVQuantizationMethod(str, Enum): "bits": 4, "sym": True, "group_size": 128, - "ratio": 0.9, + "ratio": 1.0, "dataset": "wikitext2", "quant_method": OVQuantizationMethod.AWQ, "scale_estimation": True, @@ -235,7 +235,7 @@ class OVQuantizationMethod(str, Enum): "bits": 4, "sym": True, "group_size": 128, - "ratio": 0.8, + "ratio": 1.0, "quant_method": OVQuantizationMethod.AWQ, }, "meta-llama/Llama-3.2-1B-Instruct": { From 2c6e7d90259738bf6f4c8688dbb6d105d9cb1a1e Mon Sep 17 00:00:00 2001 From: Nikolay Date: Fri, 14 Nov 2025 14:11:38 +0100 Subject: [PATCH 4/4] variant #2 - backup int8_sym --- optimum/intel/openvino/configuration.py | 148 +++++++++++++----------- 1 file changed, 80 insertions(+), 68 deletions(-) diff --git a/optimum/intel/openvino/configuration.py b/optimum/intel/openvino/configuration.py index 0267fcbaa9..1c1073feaf 100644 --- a/optimum/intel/openvino/configuration.py +++ b/optimum/intel/openvino/configuration.py @@ -52,6 +52,86 @@ class OVQuantizationMethod(str, Enum): # Default configs for 4-bit weight quantization _DEFAULT_4BIT_WQ_CONFIGS = { + ############################################################# + "meta-llama/Meta-Llama-3.1-8B-Instruct": { + "bits": 4, + "sym": True, + "group_size": 128, + "ratio": 0.8, + "quant_method": OVQuantizationMethod.AWQ, + "backup_precision": "int8_sym", + }, + "microsoft/Phi-3.5-mini-instruct": { + "bits": 4, + "sym": True, + "group_size": 128, + "ratio": 1.0, + "quant_method": OVQuantizationMethod.AWQ, + "backup_precision": "int8_sym", + }, + "microsoft/Phi-4-reasoning": { + "bits": 4, + "sym": True, + "group_size": 128, + "ratio": 1.0, + "quant_method": OVQuantizationMethod.AWQ, + "backup_precision": "int8_sym", + }, + "Qwen/Qwen3-8B": { + "bits": 4, + "sym": True, + "group_size": 128, + "ratio": 1.0, + "dataset": "wikitext2", + "scale_estimation": True, + "backup_precision": "int8_sym", + }, + "Qwen/Qwen2.5-1.5B-Instruct": { + "bits": 4, + "sym": True, + "group_size": 128, + "ratio": 1.0, + "dataset": "wikitext2", + "quant_method": OVQuantizationMethod.AWQ, + "scale_estimation": True, + "backup_precision": "int8_sym", + }, + "deepseek-ai/DeepSeek-R1-Distill-Llama-8B": { + "bits": 4, + "sym": True, + "group_size": 128, + "ratio": 1, + "backup_precision": "int8_sym", + }, + "google/gemma-3-4b-it": { + "bits": 4, + "sym": True, + "group_size": 128, + "ratio": 1.0, + "backup_precision": "int8_sym", + }, + "openbmb/MiniCPM4-0.5B": { + "bits": 4, + "sym": True, + "group_size": 128, + "ratio": 1.0, + "backup_precision": "int8_sym", + }, + "openbmb/MiniCPM4-8B": { + "bits": 4, + "sym": True, + "group_size": 128, + "ratio": 1.0, + "backup_precision": "int8_sym", + }, + "mistralai/Mistral-7B-Instruct-v0.2": { + "bits": 4, + "sym": True, + "group_size": 128, + "ratio": 1.0, + "backup_precision": "int8_sym", + }, + ############################################################# "databricks/dolly-v2-3b": { "bits": 4, "sym": False, @@ -98,15 +178,6 @@ class OVQuantizationMethod(str, Enum): "pansophic/rocket-3B": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.8}, "THUDM/chatglm2-6b": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.72}, "Qwen/Qwen-7B-Chat": {"bits": 4, "sym": True, "group_size": 128, "ratio": 0.6}, - "Qwen/Qwen2.5-1.5B-Instruct": { - "bits": 4, - "sym": True, - "group_size": 128, - "ratio": 1.0, - "dataset": "wikitext2", - "quant_method": OVQuantizationMethod.AWQ, - "scale_estimation": True, - }, "Qwen/Qwen2.5-7B-Instruct": { "bits": 4, "sym": False, @@ -138,14 +209,6 @@ class OVQuantizationMethod(str, Enum): "ratio": 1.0, "quant_method": OVQuantizationMethod.AWQ, }, - "Qwen/Qwen3-8B": { - "bits": 4, - "sym": True, - "group_size": 128, - "ratio": 1.0, - "dataset": "wikitext2", - "scale_estimation": True, - }, "openlm-research/open_llama_3b": {"bits": 4, "sym": False, "group_size": 64, "all_layers": True}, "openlm-research/open_llama_3b_v2": { "bits": 4, @@ -231,13 +294,6 @@ class OVQuantizationMethod(str, Enum): "dataset": "wikitext2", "scale_estimation": True, }, - "meta-llama/Meta-Llama-3.1-8B-Instruct": { - "bits": 4, - "sym": True, - "group_size": 128, - "ratio": 1.0, - "quant_method": OVQuantizationMethod.AWQ, - }, "meta-llama/Llama-3.2-1B-Instruct": { "bits": 4, "sym": False, @@ -260,44 +316,6 @@ class OVQuantizationMethod(str, Enum): "dataset": "wikitext2", "scale_estimation": True, }, - "microsoft/Phi-3.5-mini-instruct": { - "bits": 4, - "sym": True, - "group_size": 128, - "ratio": 1.0, - "quant_method": OVQuantizationMethod.AWQ, - }, - "microsoft/Phi-4-reasoning": { - "bits": 4, - "sym": True, - "group_size": 128, - "ratio": 1.0, - "quant_method": OVQuantizationMethod.AWQ, - }, - "google/gemma-3-4b-it": { - "bits": 4, - "sym": True, - "group_size": 128, - "ratio": 1.0, - }, - "mistralai/Mistral-7B-Instruct-v0.2": { - "bits": 4, - "sym": True, - "group_size": 128, - "ratio": 1.0, - }, - "openbmb/MiniCPM4-0.5B": { - "bits": 4, - "sym": True, - "group_size": 128, - "ratio": 1.0, - }, - "openbmb/MiniCPM4-8B": { - "bits": 4, - "sym": True, - "group_size": 128, - "ratio": 1.0, - }, "microsoft/Phi-4-mini-instruct": { "bits": 4, "sym": False, @@ -319,12 +337,6 @@ class OVQuantizationMethod(str, Enum): "ratio": 1.0, "quant_method": OVQuantizationMethod.AWQ, }, - "deepseek-ai/DeepSeek-R1-Distill-Llama-8B": { - "bits": 4, - "sym": True, - "group_size": 128, - "ratio": 1, - }, "microsoft/Phi-4-multimodal-instruct": { "quantization_configs": { "lm_model": {