diff --git a/QEfficient/transformers/models/modeling_auto.py b/QEfficient/transformers/models/modeling_auto.py
index 16a809c96..d6c4a5e65 100644
--- a/QEfficient/transformers/models/modeling_auto.py
+++ b/QEfficient/transformers/models/modeling_auto.py
@@ -1126,17 +1126,14 @@ def compile(
 
         # if ccl_enabled is True read Compute-Context-Length lists
         if self.ccl_enabled:
-            if comp_ctx_lengths_prefill is None or comp_ctx_lengths_decode is None:
-                logger.warning(
-                    "Please set comp_ctx_lengths_prefill and comp_ctx_lengths_decode with a proper list of context lengths. Using non-CCL default model."
-                )
-            self.comp_ctx_lengths_prefill, self.comp_ctx_lengths_decode = process_ccl_specializations(
+            if comp_ctx_lengths_prefill is None and comp_ctx_lengths_decode is None:
+                logger.info("Auto-generating CCL-prefill and CCL-decode lists based on Context Length (CL).")
+            self.comp_ctx_lengths_prefill, self.comp_ctx_lengths_decode, ctx_len = process_ccl_specializations(
                 comp_ctx_lengths_prefill, comp_ctx_lengths_decode, ctx_len, prefill_seq_len
             )
-
         # For supporting VLLM and Disaggregated with CCL
-        if comp_ctx_lengths_prefill is not None or comp_ctx_lengths_decode is not None:
-            self.comp_ctx_lengths_prefill, self.comp_ctx_lengths_decode = process_ccl_specializations(
+        elif comp_ctx_lengths_prefill is not None or comp_ctx_lengths_decode is not None:
+            self.comp_ctx_lengths_prefill, self.comp_ctx_lengths_decode, ctx_len = process_ccl_specializations(
                 comp_ctx_lengths_prefill, comp_ctx_lengths_decode, ctx_len, prefill_seq_len
             )
 
@@ -1774,17 +1771,14 @@ def compile(
 
         # if ccl_enabled is True read Compute-Context-Length lists
         if self.ccl_enabled:
-            if comp_ctx_lengths_prefill is None or comp_ctx_lengths_decode is None:
-                logger.warning(
-                    "Please set comp_ctx_lengths_prefill and comp_ctx_lengths_decode with a proper list of context lengths. Using non-CCL default model."
-                )
-            self.comp_ctx_lengths_prefill, self.comp_ctx_lengths_decode = process_ccl_specializations(
+            if comp_ctx_lengths_prefill is None and comp_ctx_lengths_decode is None:
+                logger.info("Auto-generating CCL-prefill and CCL-decode lists based on Context Length (CL).")
+            self.comp_ctx_lengths_prefill, self.comp_ctx_lengths_decode, ctx_len = process_ccl_specializations(
                 comp_ctx_lengths_prefill, comp_ctx_lengths_decode, ctx_len, prefill_seq_len
             )
-
         # For supporting VLLM and Disaggregated with CCL
-        if comp_ctx_lengths_prefill is not None or comp_ctx_lengths_decode is not None:
-            self.comp_ctx_lengths_prefill, self.comp_ctx_lengths_decode = process_ccl_specializations(
+        elif comp_ctx_lengths_prefill is not None or comp_ctx_lengths_decode is not None:
+            self.comp_ctx_lengths_prefill, self.comp_ctx_lengths_decode, ctx_len = process_ccl_specializations(
                 comp_ctx_lengths_prefill, comp_ctx_lengths_decode, ctx_len, prefill_seq_len
             )
 
@@ -2873,16 +2867,13 @@ def compile(
 
         # if ccl_enabled is True read Compute-Context-Length lists
         if self.ccl_enabled:
-            if comp_ctx_lengths_prefill is None or comp_ctx_lengths_decode is None:
-                logger.warning(
-                    "Please set comp_ctx_lengths_prefill and comp_ctx_lengths_decode with a proper list of context lengths. Using non-CCL default model."
-                )
-            self.comp_ctx_lengths_prefill, self.comp_ctx_lengths_decode = process_ccl_specializations(
+            if comp_ctx_lengths_prefill is None and comp_ctx_lengths_decode is None:
+                logger.info("Auto-generating CCL-prefill and CCL-decode lists based on Context Length (CL).")
+            self.comp_ctx_lengths_prefill, self.comp_ctx_lengths_decode, ctx_len = process_ccl_specializations(
                 comp_ctx_lengths_prefill, comp_ctx_lengths_decode, ctx_len, prefill_seq_len
             )
-
         # For supporting VLLM and Disaggregated with CCL
-        if comp_ctx_lengths_prefill is not None or comp_ctx_lengths_decode is not None:
+        elif comp_ctx_lengths_prefill is not None or comp_ctx_lengths_decode is not None:
             if isinstance(comp_ctx_lengths_prefill, str):
                 import ast
 
@@ -2897,7 +2888,7 @@ def compile(
                 self.comp_ctx_lengths_prefill = comp_ctx_lengths_prefill
                 self.comp_ctx_lengths_decode = comp_ctx_lengths_decode
 
-            self.comp_ctx_lengths_prefill, self.comp_ctx_lengths_decode = process_ccl_specializations(
+            self.comp_ctx_lengths_prefill, self.comp_ctx_lengths_decode, ctx_len = process_ccl_specializations(
                 self.comp_ctx_lengths_prefill, self.comp_ctx_lengths_decode, ctx_len, prefill_seq_len
             )
         # --- Validation ---
diff --git a/QEfficient/utils/check_ccl_specializations.py b/QEfficient/utils/check_ccl_specializations.py
index 0d6a078f6..779e94122 100644
--- a/QEfficient/utils/check_ccl_specializations.py
+++ b/QEfficient/utils/check_ccl_specializations.py
@@ -5,40 +5,221 @@
 #
 # -----------------------------------------------------------------------------
 
+from typing import List, Optional, Set, Tuple
+
+
+def next_multiple_of_1024(n: int) -> int:
+    """Ceil 'n' to the next multiple of 1024."""
+    if n <= 0:
+        return 0
+    return ((n + 1023) // 1024) * 1024
+
+
+def floor_to_1000(n: int) -> int:
+    """Floor 'n' to the nearest lower multiple of 1000."""
+    if n <= 0:
+        return 0
+    return (n // 1000) * 1000
+
+
+def is_power_of_two(n: int) -> bool:
+    """Return True if n is a power of two (n > 0 and n & (n - 1) == 0)."""
+    return n > 0 and (n & (n - 1)) == 0
+
+
+def band_index_from_mapped_cl(mapped_cl: int) -> int:
+    """
+    Compute band index ∈ {0,1,2} from mapped_cl using bit arithmetic.
+
+    Bands (upper bounds): 2^15=32768 → idx=0,  2^16=65536 → idx=1,  2^17=131072 → idx=2.
+    For mapped_cl > 131072, clamp to idx=2.
+    """
+    # ceil(log2(mapped_cl)) == bit_length(mapped_cl - 1)
+    ceil_log2 = (mapped_cl - 1).bit_length()
+    # map to {0,1,2} by subtracting 15 (the exponent for 32768) and clamping
+    idx = max(0, min(2, ceil_log2 - 15))
+    return idx
+
+
+def build_doubling_set(start: int, limit: int, max_elements: int) -> Set[int]:
+    """
+    Build a STRICT doubling set: {start, start*2, start*4, ...} up to 'limit',
+    collecting at most 'max_elements' values. Returns a set; caller will sort.
+    """
+    values: Set[int] = set()
+    if max_elements <= 0 or start <= 0 or limit <= 0:
+        return values
+
+    v = start
+    while v <= limit and len(values) < max_elements:
+        values.add(v)
+        v *= 2
+    return values
+
+
+def ensure_last(sorted_seq: List[int], last_value: int, max_elements: int) -> List[int]:
+    """
+    Ensure the last element equals 'last_value' by appending or replacing the final element,
+    keeping length <= max_elements. If the sequence is empty, return [last_value].
+    """
+    if max_elements <= 0:
+        return []
+    if not sorted_seq:
+        return [last_value][:max_elements]
+    if sorted_seq[-1] != last_value:
+        if len(sorted_seq) < max_elements:
+            sorted_seq.append(last_value)
+        else:
+            sorted_seq[-1] = last_value
+    return sorted_seq[:max_elements]
+
+
+def automatic_ccl_generation(
+    ctx_len: int,
+    prefill_seq_len: int,
+    comp_ctx_lengths_prefill: Optional[List[int]] = None,
+    comp_ctx_lengths_decode: Optional[List[int]] = None,
+) -> Tuple[List[int], List[int], int]:
+    """
+    Automatic Compute-Context-Length Lists Generation
+
+    Purpose:
+        Compute decode and prefill CCL lists based on an input context length (CL),
+        prefill sequence length, and optional pre-specified lists.
+
+    High-level rules (unchanged from your finalized logic):
+        - prefill_seq_len > 1:
+            * If either list is provided, pass them through unchanged.
+            * decode: doubles from tiered start; MUST end at mapped_CL (last forced to mapped_CL).
+            * prefill:
+                • If CL is power of two: STRICT doubling from tiered start, bounded by CL (no forced non-doubling last).
+                • Else: doubles from tiered start, bounded by CL, and last element = floor_to_1000(mapped_CL).
+            * Max 5 elements per list.
+        - prefill_seq_len == 1:
+            * decode and prefill are IDENTICAL.
+            * start at 4096, double up to 10 elements.
+            * upper grid cap computed dynamically (start * 2^(max_elements-1)); last = mapped_CL.
+            * If mapped_CL < 4096, both lists are [mapped_CL].
+    """
+    # Handle non-positive CL
+    if ctx_len <= 0:
+        mapped_cl = next_multiple_of_1024(1)
+        seq = [mapped_cl]
+        return seq, seq, mapped_cl
+
+    mapped_cl = next_multiple_of_1024(ctx_len)
+
+    # Early small-ctx_len case for identical lists
+    if mapped_cl <= 4096:
+        seq = [mapped_cl]
+        return seq, seq, mapped_cl
+
+    # Compute tier starts via band index (no hard-coded chain)
+    idx = band_index_from_mapped_cl(mapped_cl)
+    decode_start = 4096 << idx  # 4096, 8192, 16384
+    PREFILL_STARTS = {0: 4000, 1: 8000, 2: 16000}
+    prefill_start = PREFILL_STARTS[idx]
+
+    # Branch: prefill_seq_len > 1
+    if prefill_seq_len > 1:
+        # Passthrough if either provided
+        if comp_ctx_lengths_decode is not None or comp_ctx_lengths_prefill is not None:
+            return (
+                comp_ctx_lengths_prefill if comp_ctx_lengths_prefill is not None else [],
+                comp_ctx_lengths_decode if comp_ctx_lengths_decode is not None else [],
+                mapped_cl,
+            )
+
+        # Due to limitations in the number of specializations during compilation, we set the maximum number of elements in comp_ctx_lengths_decode and comp_ctx_lengths_prefill lists to 5.
+        max_elems = 5
+
+        # ---- Decode: strict doubling up to mapped_cl, then enforce last = mapped_cl
+        decode_set = build_doubling_set(start=decode_start, limit=mapped_cl, max_elements=max_elems)
+        decode_list = sorted(decode_set)
+        decode_list = ensure_last(decode_list, last_value=mapped_cl, max_elements=max_elems)
+
+        # ---- Prefill:
+        if is_power_of_two(ctx_len):
+            # STRICT doubling only, bounded by ctx_len; do NOT force a non-doubling last
+            prefill_set = build_doubling_set(start=prefill_start, limit=ctx_len, max_elements=max_elems)
+            prefill_list = sorted(prefill_set)[:max_elems]
+        else:
+            # Doubles bounded by ctx_len, but last must equal floor_to_1000(mapped_cl)
+            prefill_last = floor_to_1000(mapped_cl)
+            prefill_set = build_doubling_set(start=prefill_start, limit=ctx_len, max_elements=max_elems)
+            prefill_list = sorted(prefill_set)
+            prefill_list = ensure_last(prefill_list, last_value=prefill_last, max_elements=max_elems)
+
+        # NOTE: return order preserved from your last snippet (prefill first, then decode)
+        return prefill_list, decode_list, mapped_cl
+
+    # Branch: prefill_seq_len == 1 → identical lists
+    else:
+        # When prefill_seq_len=1 such as in MoE models, prefilling and decoding processes can use the same specializations and we can double the length of Ccl lists.
+        # Due to limitations in the number of specializations during compilation, we set the maximum number of elements in comp_ctx_lengths_decode and comp_ctx_lengths_prefill lists to 10.
+        max_elems = 10
+        start_identical = 4096
+
+        if mapped_cl < start_identical:
+            seq = [mapped_cl]
+            return seq, seq, mapped_cl
+
+        # Dynamic grid cap: start * 2^(max_elems - 1)
+        grid_cap = start_identical * (1 << (max_elems - 1))
+        limit = min(mapped_cl, grid_cap)
+
+        seq_set = build_doubling_set(start=start_identical, limit=limit, max_elements=max_elems)
+        seq_list = sorted(seq_set)
+        seq_list = ensure_last(seq_list, last_value=mapped_cl, max_elements=max_elems)
+
+        return seq_list, seq_list, mapped_cl
+
 
 def process_ccl_specializations(ccl_prefill, ccl_decode, ctx_len, prefill_seq_len):
-    if ccl_prefill is None or ccl_decode is None:
-        return None, None
-
-    if ctx_len is None:
-        raise TypeError("`ctx_len` is required when loading the model with CCL.")
-
-    if prefill_seq_len == 1:
-        # both prefill and decode ccl can share the same specializations since prefill_seq_len=1. So, a sorted union of both lists can be used for both of them.
-        ccl_union_all = sorted(set(ccl_prefill + ccl_decode))
-        ccl_union_all = [min(x, ctx_len) for x in ccl_union_all]
-        return ccl_union_all, ccl_union_all
-
-    # Step 1: Cap values to ctx_len
-    ccl_prefill = [min(x, ctx_len) for x in ccl_prefill]
-    ccl_decode = [min(x, ctx_len) for x in ccl_decode]
-
-    # Step 2: Remove duplicates within each list
-    ccl_prefill = list(set(ccl_prefill))
-    ccl_decode = list(set(ccl_decode))
-
-    # Step 3: Ensure no overlap between ccl_prefill and ccl_decode
-    updated_prefill = []
-    for val in ccl_prefill:
-        while val in ccl_decode or val in updated_prefill:
-            val -= 1
-            if val < 0:
-                break  # Prevent negative values
-        if val >= 0:
-            updated_prefill.append(val)
-
-    # Step 4: Sort both lists
-    updated_prefill.sort()
-    ccl_decode.sort()
-
-    return updated_prefill, ccl_decode
+    # Automatic CCL generation: If both ccl_prefill and ccl_decode are None,
+    # generate optimized context length lists for prefill and decode based on ctx_len
+    if ccl_prefill is None and ccl_decode is None:
+        ccl_prefill, ccl_decode, ctx_len = automatic_ccl_generation(ctx_len, prefill_seq_len, ccl_prefill, ccl_decode)
+    else:
+        if prefill_seq_len == 1:
+            if ccl_prefill is not None and ccl_decode is not None:
+                # both prefill and decode ccl can share the same specializations since prefill_seq_len=1. So, a sorted union of both lists can be used for both of them.
+                ccl_union_all = sorted(set(ccl_prefill + ccl_decode))
+                ccl_union_all = [min(x, ctx_len) for x in ccl_union_all]
+                ccl_prefill = ccl_union_all
+                ccl_decode = ccl_union_all
+        else:
+            # Step 1: Cap values to ctx_len
+            ccl_prefill = [min(x, ctx_len) for x in ccl_prefill] if ccl_prefill is not None else None
+            ccl_decode = [min(x, ctx_len) for x in ccl_decode] if ccl_decode is not None else None
+
+            # Step 2: Remove duplicates within each list
+            ccl_prefill = list(set(ccl_prefill)) if ccl_prefill is not None else None
+            ccl_decode = list(set(ccl_decode)) if ccl_decode is not None else None
+
+            if ccl_prefill is None or ccl_decode is None:
+                if ccl_prefill:
+                    ccl_prefill.sort()
+                if ccl_decode:
+                    ccl_decode.sort()
+            else:
+                # Step 3: Ensure no overlap between ccl_prefill and ccl_decode
+                tmp_prefill = ccl_prefill
+                ccl_prefill = []
+                for val in tmp_prefill:
+                    while val in ccl_decode or val in ccl_prefill:
+                        val -= 1
+                        if val < 0:
+                            break  # Prevent negative values
+                    if val >= 0:
+                        ccl_prefill.append(val)
+
+                # Step 4: Sort both lists
+                ccl_prefill.sort()
+                ccl_decode.sort()
+
+    print("CCL Configuration:")
+    print(f"  - Prefill context lengths: {ccl_prefill}")
+    print(f"  - Decode context lengths: {ccl_decode}")
+    print(f"  - Max context length: {ctx_len}")
+    return ccl_prefill, ccl_decode, ctx_len
diff --git a/examples/performance/compute_context_length/README.md b/examples/performance/compute_context_length/README.md
index 9f1d29b9a..2115251e2 100644
--- a/examples/performance/compute_context_length/README.md
+++ b/examples/performance/compute_context_length/README.md
@@ -37,11 +37,22 @@ python basic_inference.py \
     --model-name meta-llama/Llama-3.2-1B \
     --prompt "Hello, how are you?" \
     --ctx-len 1024 \
+    --ccl-enabled \
     --comp-ctx-lengths-prefill "256,500" \
     --comp-ctx-lengths-decode "512,1024" \
     --generation-len 100
 ```
 
+# For automatic CCL lists generation, simply not pass CCL lists and only pass ccl-enabled flag
+```bash
+python basic_inference.py \
+    --model-name meta-llama/Llama-3.2-1B \
+    --prompt "Hello, how are you?" \
+    --ctx-len 1024 \
+    --ccl-enabled \
+    --generation-len 100
+```
+
 ### Vision-Language Models
 
 Run VLM inference with CCL:
@@ -55,11 +66,22 @@ python vlm_inference.py \
     --model-name meta-llama/Llama-3.2-11B-Vision-Instruct \
     --query "Describe this image" \
     --image-url "https://..." \
+    --ccl-enabled \
     --comp-ctx-lengths-prefill "4096" \
     --comp-ctx-lengths-decode "6144,8192" \
     --ctx-len 8192
 ```
 
+# For automatic CCL lists generation, simply not pass CCL lists and only pass ccl-enabled flag
+```bash
+python vlm_inference.py \
+    --model-name meta-llama/Llama-3.2-11B-Vision-Instruct \
+    --query "Describe this image" \
+    --image-url "https://..." \
+    --ccl-enabled \
+    --ctx-len 8192
+```
+
 ## Available Examples
 
 ### Text-Only Models
diff --git a/examples/performance/compute_context_length/basic_inference.py b/examples/performance/compute_context_length/basic_inference.py
index 4533c47e8..6e8c045fb 100644
--- a/examples/performance/compute_context_length/basic_inference.py
+++ b/examples/performance/compute_context_length/basic_inference.py
@@ -54,13 +54,13 @@ def main():
     parser.add_argument(
         "--comp-ctx-lengths-prefill",
         type=lambda x: [int(i) for i in x.split(",")],
-        default="256,500",
+        default=None,
         help="Comma-separated list of context lengths for prefill phase (e.g., '256,500')",
     )
     parser.add_argument(
         "--comp-ctx-lengths-decode",
         type=lambda x: [int(i) for i in x.split(",")],
-        default="512,1024",
+        default=None,
         help="Comma-separated list of context lengths for decode phase (e.g., '512,1024')",
     )
     parser.add_argument(
@@ -107,11 +107,7 @@ def main():
     args = parser.parse_args()
 
     print(f"Loading model: {args.model_name}")
-    print("CCL Configuration:")
-    print(f"  - Prefill context lengths: {args.comp_ctx_lengths_prefill}")
-    print(f"  - Decode context lengths: {args.comp_ctx_lengths_decode}")
-    print(f"  - Max context length: {args.ctx_len}")
-    print(f"  - Continuous batching: {args.continuous_batching}")
+    print(f"Continuous batching: {args.continuous_batching}")
 
     # Load model with CCL configuration
     model = QEFFAutoModelForCausalLM.from_pretrained(
diff --git a/examples/performance/compute_context_length/gemma3.py b/examples/performance/compute_context_length/gemma3.py
index d9672b9e3..1dcec5c81 100644
--- a/examples/performance/compute_context_length/gemma3.py
+++ b/examples/performance/compute_context_length/gemma3.py
@@ -21,14 +21,16 @@
 processor = AutoProcessor.from_pretrained(model_id)
 
 ## Activate Compute-Context-Length (CCL) feature by setting ccl_enabled=True when loading the model with from_pretrained().
-## Use the optional comp_ctx_lengths argument to provide two lists of context lengths for the prefilling and decoding processes. If comp_ctx_lengths=None, the model will run with its default context length.
+## Use the optional comp_ctx_lengths_prefill and comp_ctx_lengths_decode to provide two lists of context lengths for the prefilling and decoding processes. If both are None, the lists will be generated automatically based on the context length.
 ##   - The first list, comp_ctx_lengths_prefill, defines the compute-context-length values for the prefilling process.
 ##           -- The process starts with the first value in the list and gradually increases the context length based on the position_id of the current prompt chunk.
 ##   - The second list, comp_ctx_lengths_decode, defines the compute-context-length values for the decoding process.
 ##           -- During decoding, the model selects an appropriate context length from the list based on the input prompt length and cache index.
-##           -- It starts from the correct value in the list and increases the context length dynamically when the cache index exceeds the current threshold.
+##           -- It starts from the correct value in the list and increases the context length dynamically when the generated token's cache index exceeds the current CCL value.
 
 ctx_len = 8192
+ccl_enabled = True
+# Two optional lists, comp_ctx_lengths_prefill and comp_ctx_lengths_decode, define CCL values for prefilling and decoding.
 comp_ctx_lengths_prefill = [3072]
 comp_ctx_lengths_decode = [4096, ctx_len]
 
@@ -40,7 +42,7 @@
     attn_implementation="eager",
     kv_offload=True,
     qaic_config={
-        "ccl_enabled": True,
+        "ccl_enabled": ccl_enabled,
     },
 )
 
diff --git a/examples/performance/compute_context_length/gpt_oss.py b/examples/performance/compute_context_length/gpt_oss.py
index 39a5d48ed..92bef9148 100644
--- a/examples/performance/compute_context_length/gpt_oss.py
+++ b/examples/performance/compute_context_length/gpt_oss.py
@@ -12,16 +12,17 @@
 model_id = "openai/gpt-oss-20b"  # weights are not required to convert to fp32
 
 ## Activate Compute-Context-Length (CCL) feature by setting ccl_enabled=True when loading the model with from_pretrained().
-## Use the optional comp_ctx_lengths argument to provide two lists of context lengths for the prefilling and decoding processes. If comp_ctx_lengths=None, the model will run with its default context length.
+## Use the optional comp_ctx_lengths_prefill and comp_ctx_lengths_decode to provide two lists of context lengths for the prefilling and decoding processes. If both are None, the lists will be generated automatically based on the context length.
 ##   - The first list, comp_ctx_lengths_prefill, defines the compute-context-length values for the prefilling process.
 ##           -- The process starts with the first value in the list and gradually increases the context length based on the position_id of the current prompt chunk.
 ##   - The second list, comp_ctx_lengths_decode, defines the compute-context-length values for the decoding process.
 ##           -- During decoding, the model selects an appropriate context length from the list based on the input prompt length and cache index.
-##           -- It starts from the correct value in the list and increases the context length dynamically when the cache index exceeds the current threshold.
+##           -- It starts from the correct value in the list and increases the context length dynamically when the generated token's cache index exceeds the current CCL value.
 
 ctx_len = 4096
+ccl_enabled = True
+# Two optional lists, comp_ctx_lengths_prefill and comp_ctx_lengths_decode, define CCL values for prefilling and decoding.
 # In moe models like gpt-oss, since prefill_seq_len=1 both comp_ctx_lengths_prefill and comp_ctx_lengths_decode can share similar lists.
-# Set the list of ccl during prefilling and decoding processes
 comp_ctx_lengths_prefill = comp_ctx_lengths_decode = [1024, ctx_len]
 
 qeff_model = QEFFAutoModelForCausalLM.from_pretrained(
diff --git a/examples/performance/compute_context_length/granite_vision.py b/examples/performance/compute_context_length/granite_vision.py
index 6dd38395c..ef5dc3a51 100644
--- a/examples/performance/compute_context_length/granite_vision.py
+++ b/examples/performance/compute_context_length/granite_vision.py
@@ -98,6 +98,7 @@ def run_model(
     num_devices = 4
     ctx_len = 8192
     ccl_enabled = True
+    # Two optional lists, comp_ctx_lengths_prefill and comp_ctx_lengths_decode, define CCL values for prefilling and decoding. If both are None, the lists will be generated automatically based on the context length.
     comp_ctx_lengths_prefill = [5500]
     comp_ctx_lengths_decode = [6144, ctx_len]
 
diff --git a/examples/performance/compute_context_length/internvl.py b/examples/performance/compute_context_length/internvl.py
index 19bcf4bc1..02e965e0d 100644
--- a/examples/performance/compute_context_length/internvl.py
+++ b/examples/performance/compute_context_length/internvl.py
@@ -263,6 +263,7 @@ def run_intern_on_aic(
 
     ctx_len = 8192
     ccl_enabled = True
+    # Two optional lists, comp_ctx_lengths_prefill and comp_ctx_lengths_decode, define CCL values for prefilling and decoding. If both are None, the lists will be generated automatically based on the context length.
     comp_ctx_lengths_prefill = [4096]
     comp_ctx_lengths_decode = [6144, ctx_len]
 
diff --git a/examples/performance/compute_context_length/llama4.py b/examples/performance/compute_context_length/llama4.py
index 8cdbd70a1..a867e1bd3 100644
--- a/examples/performance/compute_context_length/llama4.py
+++ b/examples/performance/compute_context_length/llama4.py
@@ -18,14 +18,16 @@
 config.vision_config.num_hidden_layers = 2
 
 ## Activate Compute-Context-Length (CCL) feature by setting ccl_enabled=True when loading the model with from_pretrained().
-## Use the optional comp_ctx_lengths argument to provide two lists of context lengths for the prefilling and decoding processes. If comp_ctx_lengths=None, the model will run with its default context length.
+## Use the optional comp_ctx_lengths_prefill and comp_ctx_lengths_decode to provide two lists of context lengths for the prefilling and decoding processes. If both are None, the lists will be generated automatically based on the context length.
 ##   - The first list, comp_ctx_lengths_prefill, defines the compute-context-length values for the prefilling process.
 ##           -- The process starts with the first value in the list and gradually increases the context length based on the position_id of the current prompt chunk.
 ##   - The second list, comp_ctx_lengths_decode, defines the compute-context-length values for the decoding process.
 ##           -- During decoding, the model selects an appropriate context length from the list based on the input prompt length and cache index.
-##           -- It starts from the correct value in the list and increases the context length dynamically when the cache index exceeds the current threshold.
+##           -- It starts from the correct value in the list and increases the context length dynamically when the generated token's cache index exceeds the current CCL value.
 
 ctx_len = 8192
+ccl_enabled = True
+# Two optional lists, comp_ctx_lengths_prefill and comp_ctx_lengths_decode, define CCL values for prefilling and decoding.
 # Set the list of ccl during prefilling process
 comp_ctx_lengths_prefill = [3072]
 # Set the list of ccl during decoding process
@@ -37,7 +39,7 @@
     kv_offload=True,
     config=config,
     qaic_config={
-        "ccl_enabled": True,
+        "ccl_enabled": ccl_enabled,
     },
 )
 tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)
diff --git a/examples/performance/compute_context_length/llama4_cb.py b/examples/performance/compute_context_length/llama4_cb.py
index ffbbff67f..f97160693 100644
--- a/examples/performance/compute_context_length/llama4_cb.py
+++ b/examples/performance/compute_context_length/llama4_cb.py
@@ -20,14 +20,16 @@
 processor = AutoProcessor.from_pretrained(model_id)
 
 ## Activate Compute-Context-Length (CCL) feature by setting ccl_enabled=True when loading the model with from_pretrained().
-## Use the optional comp_ctx_lengths argument to provide two lists of context lengths for the prefilling and decoding processes. If comp_ctx_lengths=None, the model will run with its default context length.
+## Use the optional comp_ctx_lengths_prefill and comp_ctx_lengths_decode to provide two lists of context lengths for the prefilling and decoding processes. If both are None, the lists will be generated automatically based on the context length.
 ##   - The first list, comp_ctx_lengths_prefill, defines the compute-context-length values for the prefilling process.
 ##           -- The process starts with the first value in the list and gradually increases the context length based on the position_id of the current prompt chunk.
 ##   - The second list, comp_ctx_lengths_decode, defines the compute-context-length values for the decoding process.
 ##           -- During decoding, the model selects an appropriate context length from the list based on the input prompt length and cache index.
-##           -- It starts from the correct value in the list and increases the context length dynamically when the cache index exceeds the current threshold.
+##           -- It starts from the correct value in the list and increases the context length dynamically when the generated token's cache index exceeds the current CCL value.
 
 ctx_len = 4096
+ccl_enabled = True
+# Two optional lists, comp_ctx_lengths_prefill and comp_ctx_lengths_decode, define CCL values for prefilling and decoding.
 # Set the list of ccl during prefilling process
 comp_ctx_lengths_prefill = [3072]
 # Set the list of ccl during decoding process
@@ -42,7 +44,7 @@
         config=config,
         continuous_batching=True,
         qaic_config={
-            "ccl_enabled": True,
+            "ccl_enabled": ccl_enabled,
         },
     )
 
@@ -69,7 +71,7 @@
         kv_offload=True,
         config=config,
         qaic_config={
-            "ccl_enabled": True,
+            "ccl_enabled": ccl_enabled,
         },
     )
 
diff --git a/examples/performance/compute_context_length/llama4_multi_image.py b/examples/performance/compute_context_length/llama4_multi_image.py
index fd513fe45..314aa49b3 100644
--- a/examples/performance/compute_context_length/llama4_multi_image.py
+++ b/examples/performance/compute_context_length/llama4_multi_image.py
@@ -18,14 +18,16 @@
 config.vision_config.num_hidden_layers = 2
 
 ## Activate Compute-Context-Length (CCL) feature by setting ccl_enabled=True when loading the model with from_pretrained().
-## Use the optional comp_ctx_lengths argument to provide two lists of context lengths for the prefilling and decoding processes. If comp_ctx_lengths=None, the model will run with its default context length.
+## Use the optional comp_ctx_lengths_prefill and comp_ctx_lengths_decode to provide two lists of context lengths for the prefilling and decoding processes. If both are None, the lists will be generated automatically based on the context length.
 ##   - The first list, comp_ctx_lengths_prefill, defines the compute-context-length values for the prefilling process.
 ##           -- The process starts with the first value in the list and gradually increases the context length based on the position_id of the current prompt chunk.
 ##   - The second list, comp_ctx_lengths_decode, defines the compute-context-length values for the decoding process.
 ##           -- During decoding, the model selects an appropriate context length from the list based on the input prompt length and cache index.
-##           -- It starts from the correct value in the list and increases the context length dynamically when the cache index exceeds the current threshold.
+##           -- It starts from the correct value in the list and increases the context length dynamically when the generated token's cache index exceeds the current CCL value.
 
 ctx_len = 8192
+ccl_enabled = True
+# Two optional lists, comp_ctx_lengths_prefill and comp_ctx_lengths_decode, define CCL values for prefilling and decoding.
 # Set the list of ccl during prefilling process
 comp_ctx_lengths_prefill = [5376]
 # Set the list of ccl during decoding process
@@ -37,7 +39,7 @@
     kv_offload=True,
     config=config,
     qaic_config={
-        "ccl_enabled": True,
+        "ccl_enabled": ccl_enabled,
     },
 )
 tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)
diff --git a/examples/performance/compute_context_length/mistral3.py b/examples/performance/compute_context_length/mistral3.py
index 3763fbcde..a773ddfd9 100644
--- a/examples/performance/compute_context_length/mistral3.py
+++ b/examples/performance/compute_context_length/mistral3.py
@@ -101,6 +101,7 @@ def run_model(
     num_cores = 16
     num_devices = 4
     ccl_enabled = True
+    # Two optional lists, comp_ctx_lengths_prefill and comp_ctx_lengths_decode, define CCL values for prefilling and decoding. If both are None, the lists will be generated automatically based on the context length.
     comp_ctx_lengths_prefill = [4096]
     comp_ctx_lengths_decode = [6144, ctx_len]
 
diff --git a/examples/performance/compute_context_length/molmo.py b/examples/performance/compute_context_length/molmo.py
index b5f1f50e6..8d773f5fe 100644
--- a/examples/performance/compute_context_length/molmo.py
+++ b/examples/performance/compute_context_length/molmo.py
@@ -19,15 +19,17 @@
 # config.num_hidden_layers = 2
 
 ## Activate Compute-Context-Length (CCL) feature by setting ccl_enabled=True when loading the model with from_pretrained().
-## Use the optional comp_ctx_lengths argument to provide two lists of context lengths for the prefilling and decoding processes. If comp_ctx_lengths=None, the model will run with its default context length.
+## Use the optional comp_ctx_lengths_prefill and comp_ctx_lengths_decode to provide two lists of context lengths for the prefilling and decoding processes. If both are None, the lists will be generated automatically based on the context length.
 ##   - The first list, comp_ctx_lengths_prefill, defines the compute-context-length values for the prefilling process.
 ##           -- The process starts with the first value in the list and gradually increases the context length based on the position_id of the current prompt chunk.
 ##   - The second list, comp_ctx_lengths_decode, defines the compute-context-length values for the decoding process.
 ##           -- During decoding, the model selects an appropriate context length from the list based on the input prompt length and cache index.
-##           -- It starts from the correct value in the list and increases the context length dynamically when the cache index exceeds the current threshold.
+##           -- It starts from the correct value in the list and increases the context length dynamically when the generated token's cache index exceeds the current CCL value.
 
 # load the model
 ctx_len = 8192
+ccl_enabled = True
+# Two optional lists, comp_ctx_lengths_prefill and comp_ctx_lengths_decode, define CCL values for prefilling and decoding.
 comp_ctx_lengths_prefill = [3072]  # None #
 comp_ctx_lengths_decode = [4096, 8192]  # None #
 
@@ -37,7 +39,7 @@
     trust_remote_code=True,
     config=config,
     qaic_config={
-        "ccl_enabled": True,
+        "ccl_enabled": ccl_enabled,
     },
 )
 tokenizer = transformers.AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
diff --git a/examples/performance/compute_context_length/qwen2_5_vl.py b/examples/performance/compute_context_length/qwen2_5_vl.py
index 20960b6a9..5a6818930 100644
--- a/examples/performance/compute_context_length/qwen2_5_vl.py
+++ b/examples/performance/compute_context_length/qwen2_5_vl.py
@@ -23,14 +23,16 @@
 config.text_config.num_hidden_layers = 2
 
 ## Activate Compute-Context-Length (CCL) feature by setting ccl_enabled=True when loading the model with from_pretrained().
-## Use the optional comp_ctx_lengths argument to provide two lists of context lengths for the prefilling and decoding processes. If comp_ctx_lengths=None, the model will run with its default context length.
+## Use the optional comp_ctx_lengths_prefill and comp_ctx_lengths_decode to provide two lists of context lengths for the prefilling and decoding processes. If both are None, the lists will be generated automatically based on the context length.
 ##   - The first list, comp_ctx_lengths_prefill, defines the compute-context-length values for the prefilling process.
 ##           -- The process starts with the first value in the list and gradually increases the context length based on the position_id of the current prompt chunk.
 ##   - The second list, comp_ctx_lengths_decode, defines the compute-context-length values for the decoding process.
 ##           -- During decoding, the model selects an appropriate context length from the list based on the input prompt length and cache index.
-##           -- It starts from the correct value in the list and increases the context length dynamically when the cache index exceeds the current threshold.
+##           -- It starts from the correct value in the list and increases the context length dynamically when the generated token's cache index exceeds the current CCL value.
 
 ctx_len = 8192
+ccl_enabled = True
+# Two optional lists, comp_ctx_lengths_prefill and comp_ctx_lengths_decode, define CCL values for prefilling and decoding.
 comp_ctx_lengths_prefill = [4096]  # None #
 comp_ctx_lengths_decode = [6144, ctx_len]  # None #
 
@@ -40,7 +42,7 @@
     kv_offload=True,
     config=config,
     qaic_config={
-        "ccl_enabled": True,
+        "ccl_enabled": ccl_enabled,
     },
 )
 tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)
diff --git a/examples/performance/compute_context_length/qwen2_5_vl_cb.py b/examples/performance/compute_context_length/qwen2_5_vl_cb.py
index fc330e14e..c247a1e58 100644
--- a/examples/performance/compute_context_length/qwen2_5_vl_cb.py
+++ b/examples/performance/compute_context_length/qwen2_5_vl_cb.py
@@ -20,14 +20,16 @@
 config.text_config.num_hidden_layers = 4
 
 ## Activate Compute-Context-Length (CCL) feature by setting ccl_enabled=True when loading the model with from_pretrained().
-## Use the optional comp_ctx_lengths argument to provide two lists of context lengths for the prefilling and decoding processes. If comp_ctx_lengths=None, the model will run with its default context length.
+## Use the optional comp_ctx_lengths_prefill and comp_ctx_lengths_decode to provide two lists of context lengths for the prefilling and decoding processes. If both are None, the lists will be generated automatically based on the context length.
 ##   - The first list, comp_ctx_lengths_prefill, defines the compute-context-length values for the prefilling process.
 ##           -- The process starts with the first value in the list and gradually increases the context length based on the position_id of the current prompt chunk.
 ##   - The second list, comp_ctx_lengths_decode, defines the compute-context-length values for the decoding process.
 ##           -- During decoding, the model selects an appropriate context length from the list based on the input prompt length and cache index.
-##           -- It starts from the correct value in the list and increases the context length dynamically when the cache index exceeds the current threshold.
+##           -- It starts from the correct value in the list and increases the context length dynamically when the generated token's cache index exceeds the current CCL value.
 
 ctx_len = 8192
+ccl_enabled = True
+# Two optional lists, comp_ctx_lengths_prefill and comp_ctx_lengths_decode, define CCL values for prefilling and decoding.
 comp_ctx_lengths_prefill = [4096]
 comp_ctx_lengths_decode = [6144, ctx_len]
 
@@ -38,7 +40,7 @@
     config=config,
     continuous_batching=True,
     qaic_config={
-        "ccl_enabled": True,
+        "ccl_enabled": ccl_enabled,
     },
 )
 tokenizer = transformers.AutoTokenizer.from_pretrained(model_id)
diff --git a/examples/performance/compute_context_length/qwen3moe.py b/examples/performance/compute_context_length/qwen3moe.py
index b53a28362..93849fa5a 100644
--- a/examples/performance/compute_context_length/qwen3moe.py
+++ b/examples/performance/compute_context_length/qwen3moe.py
@@ -17,15 +17,17 @@
 """
 
 ## Activate Compute-Context-Length (CCL) feature by setting ccl_enabled=True when loading the model with from_pretrained().
-## Use the optional comp_ctx_lengths argument to provide two lists of context lengths for the prefilling and decoding processes. If comp_ctx_lengths=None, the model will run with its default context length.
+## Use the optional comp_ctx_lengths_prefill and comp_ctx_lengths_decode to provide two lists of context lengths for the prefilling and decoding processes. If both are None, the lists will be generated automatically based on the context length.
 ##   - The first list, comp_ctx_lengths_prefill, defines the compute-context-length values for the prefilling process.
 ##           -- The process starts with the first value in the list and gradually increases the context length based on the position_id of the current prompt chunk.
 ##   - The second list, comp_ctx_lengths_decode, defines the compute-context-length values for the decoding process.
 ##           -- During decoding, the model selects an appropriate context length from the list based on the input prompt length and cache index.
-##           -- It starts from the correct value in the list and increases the context length dynamically when the cache index exceeds the current threshold.
+##           -- It starts from the correct value in the list and increases the context length dynamically when the generated token's cache index exceeds the current CCL value.
 
 ctx_len = 1024
 prefill_seq_len = 1
+ccl_enabled = True
+# Two optional lists, comp_ctx_lengths_prefill and comp_ctx_lengths_decode, define CCL values for prefilling and decoding.
 # In moe models when compiling with prefill_seq_len=1 and non-continuous-batching mode, prefill and decode will share the same ccl specializations.
 comp_ctx_lengths_prefill = comp_ctx_lengths_decode = [256, 512, ctx_len]
 
@@ -33,7 +35,7 @@
     model_name,
     continuous_batching=False,
     qaic_config={
-        "ccl_enabled": True,
+        "ccl_enabled": ccl_enabled,
     },
 )
 
@@ -49,6 +51,5 @@
     comp_ctx_lengths_prefill=comp_ctx_lengths_prefill,
     comp_ctx_lengths_decode=comp_ctx_lengths_decode,
 )
-
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 exec_info = model.generate(prompts=Constants.INPUT_STR, tokenizer=tokenizer)
diff --git a/examples/performance/compute_context_length/vlm_inference.py b/examples/performance/compute_context_length/vlm_inference.py
index 876daa3e6..294632fe3 100644
--- a/examples/performance/compute_context_length/vlm_inference.py
+++ b/examples/performance/compute_context_length/vlm_inference.py
@@ -58,10 +58,6 @@ def run_model(
     """
     print(f"Loading model: {model_name}")
     print(f"KV offload (Dual QPC mode): {kv_offload}")
-    print("CCL Configuration:")
-    print(f"  - Prefill context lengths: {comp_ctx_lengths_prefill}")
-    print(f"  - Decode context lengths: {comp_ctx_lengths_decode}")
-    print(f"  - Max context length: {ctx_len}")
 
     ## STEP 1: Load the Processor and Model
 
@@ -186,13 +182,13 @@ def main():
     parser.add_argument(
         "--comp-ctx-lengths-prefill",
         type=lambda x: [int(i) for i in x.split(",")],
-        default="4096",
+        default=None,
         help="Comma-separated list of context lengths for prefill phase (e.g., '4096')",
     )
     parser.add_argument(
         "--comp-ctx-lengths-decode",
         type=lambda x: [int(i) for i in x.split(",")],
-        default="6144,8192",
+        default=None,
         help="Comma-separated list of context lengths for decode phase (e.g., '6144,8192')",
     )
     parser.add_argument(