[ci] add free disk before build test whl package and add session_len args in benchmark script (#4136)

zhulinJulia24 · web-flow · commit efbba83475b4 · 2025-11-19T14:13:36.000+08:00
diff --git a/.github/workflows/api_eval.yml b/.github/workflows/api_eval.yml
@@ -64,6 +64,18 @@ jobs:
       DOCKER_TAG: cuda12.8
       OUTPUT_FOLDER: cuda12.8_dist_${{ github.run_id }}
     steps:
+      - name: Free disk space
+        uses: jlumbroso/free-disk-space@main
+        with:
+          # This might remove tools that are actually needed, if set to "true" but frees about 6 GB
+          tool-cache: false
+          docker-images: false
+          # All of these default to true, but feel free to set to "false" if necessary for your workflow
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: true
+          swap-storage: false
       - name: Checkout repository
         uses: actions/checkout@v3
         with:
diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml
@@ -46,6 +46,18 @@ jobs:
       PLAT_NAME: manylinux2014_x86_64
       DOCKER_TAG: cuda12.8
     steps:
+      - name: Free disk space
+        uses: jlumbroso/free-disk-space@main
+        with:
+          # This might remove tools that are actually needed, if set to "true" but frees about 6 GB
+          tool-cache: false
+          docker-images: false
+          # All of these default to true, but feel free to set to "false" if necessary for your workflow
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: true
+          swap-storage: false
       - name: Checkout repository
         uses: actions/checkout@v3
         with:
diff --git a/.github/workflows/daily_ete_test.yml b/.github/workflows/daily_ete_test.yml
@@ -66,6 +66,18 @@ jobs:
       PLAT_NAME: manylinux2014_x86_64
       DOCKER_TAG: cuda12.8
     steps:
+      - name: Free disk space
+        uses: jlumbroso/free-disk-space@main
+        with:
+          # This might remove tools that are actually needed, if set to "true" but frees about 6 GB
+          tool-cache: false
+          docker-images: false
+          # All of these default to true, but feel free to set to "false" if necessary for your workflow
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: true
+          swap-storage: false
       - name: Checkout repository
         uses: actions/checkout@v3
         with:
diff --git a/.github/workflows/daily_ete_test_3090.yml b/.github/workflows/daily_ete_test_3090.yml
@@ -65,6 +65,18 @@ jobs:
       PLAT_NAME: manylinux2014_x86_64
       DOCKER_TAG: cuda12.4
     steps:
+      - name: Free disk space
+        uses: jlumbroso/free-disk-space@main
+        with:
+          # This might remove tools that are actually needed, if set to "true" but frees about 6 GB
+          tool-cache: false
+          docker-images: false
+          # All of these default to true, but feel free to set to "false" if necessary for your workflow
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: true
+          swap-storage: false
       - name: Checkout repository
         uses: actions/checkout@v3
         with:
diff --git a/.github/workflows/daily_ete_test_5080.yml b/.github/workflows/daily_ete_test_5080.yml
@@ -65,6 +65,18 @@ jobs:
       PLAT_NAME: manylinux2014_x86_64
       DOCKER_TAG: cuda12.8
     steps:
+      - name: Free disk space
+        uses: jlumbroso/free-disk-space@main
+        with:
+          # This might remove tools that are actually needed, if set to "true" but frees about 6 GB
+          tool-cache: false
+          docker-images: false
+          # All of these default to true, but feel free to set to "false" if necessary for your workflow
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: true
+          swap-storage: false
       - name: Checkout repository
         uses: actions/checkout@v3
         with:
diff --git a/.github/workflows/daily_ete_test_h800.yml b/.github/workflows/daily_ete_test_h800.yml
@@ -65,6 +65,18 @@ jobs:
       PLAT_NAME: manylinux2014_x86_64
       DOCKER_TAG: cuda12.8
     steps:
+      - name: Free disk space
+        uses: jlumbroso/free-disk-space@main
+        with:
+          # This might remove tools that are actually needed, if set to "true" but frees about 6 GB
+          tool-cache: false
+          docker-images: false
+          # All of these default to true, but feel free to set to "false" if necessary for your workflow
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: true
+          swap-storage: false
       - name: Checkout repository
         uses: actions/checkout@v3
         with:
diff --git a/.github/workflows/evaluate.yml b/.github/workflows/evaluate.yml
@@ -56,6 +56,18 @@ jobs:
       DOCKER_TAG: cuda12.8
       OUTPUT_FOLDER: cuda12.8_dist_${{ github.run_id }}
     steps:
+      - name: Free disk space
+        uses: jlumbroso/free-disk-space@main
+        with:
+          # This might remove tools that are actually needed, if set to "true" but frees about 6 GB
+          tool-cache: false
+          docker-images: false
+          # All of these default to true, but feel free to set to "false" if necessary for your workflow
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: true
+          swap-storage: false
       - name: Checkout repository
         uses: actions/checkout@v3
         with:
diff --git a/autotest/utils/benchmark_utils.py b/autotest/utils/benchmark_utils.py
@@ -105,12 +105,14 @@ def longtext_throughput_test(config,
     for input_len, out_len, num_prompts, case_name, concurrency in [(1, 32768, 20, '32k', None),
                                                                     (1, 65536, 10, '64k', None),
                                                                     (198000, 1024, 3, '198k', 1)]:
+        session_len = input_len + out_len
         csv_path = f'{benchmark_path}/longtext_{case_name}_1th.csv'
         benchmark_log = os.path.join(
             log_path, f'benchmark_longtext_throughput_{case_name}' + model.split('/')[1] + worker_id + '.log')
         cmd = ' '.join([
             command, '--dataset-name random', f'--random-input-len {input_len}', f'--random-output-len {out_len}',
-            f'--num-prompts {num_prompts}', '--stream-output', f'--csv {csv_path}'
+            f'--num-prompts {num_prompts}', '--stream-output', f'--session-len {session_len}', '--random-range-ratio 1',
+            f'--csv {csv_path}'
         ])
         if concurrency:
             cmd += f' --concurrency {concurrency}'
diff --git a/benchmark/profile_pipeline_api.py b/benchmark/profile_pipeline_api.py
@@ -259,13 +259,15 @@ def parse_args():
 
     tp_act = ArgumentHelper.tp(pt_group)
     cache_count_act = ArgumentHelper.cache_max_entry_count(pt_group)
+    session_len_act = ArgumentHelper.session_len(pt_group)
     cache_block_seq_len_act = ArgumentHelper.cache_block_seq_len(pt_group)
     prefix_caching_act = ArgumentHelper.enable_prefix_caching(pt_group)
 
     # turbomind engine args
     tb_group = parser.add_argument_group('TurboMind engine argument')
     tb_group._group_actions.append(tp_act)
     tb_group._group_actions.append(cache_count_act)
+    tb_group._group_actions.append(session_len_act)
     tb_group._group_actions.append(cache_block_seq_len_act)
     tb_group._group_actions.append(prefix_caching_act)
     ArgumentHelper.model_format(tb_group, default='hf')
@@ -287,6 +289,7 @@ def main():
             max_batch_size=args.concurrency,
             tp=args.tp,
             cache_max_entry_count=args.cache_max_entry_count,
+            session_len=args.session_len,
             cache_block_seq_len=args.cache_block_seq_len,
             model_format=args.model_format,
             quant_policy=args.quant_policy,
@@ -298,6 +301,7 @@ def main():
     elif args.backend == 'pytorch':
         engine_config = PytorchEngineConfig(
             cache_max_entry_count=args.cache_max_entry_count,
+            session_len=args.session_len,
             block_size=args.cache_block_seq_len,
             max_batch_size=args.concurrency,
             tp=args.tp,