@@ -259,13 +259,15 @@ def parse_args():
259259
260260 tp_act = ArgumentHelper .tp (pt_group )
261261 cache_count_act = ArgumentHelper .cache_max_entry_count (pt_group )
262+ session_len_act = ArgumentHelper .session_len (pt_group )
262263 cache_block_seq_len_act = ArgumentHelper .cache_block_seq_len (pt_group )
263264 prefix_caching_act = ArgumentHelper .enable_prefix_caching (pt_group )
264265
265266 # turbomind engine args
266267 tb_group = parser .add_argument_group ('TurboMind engine argument' )
267268 tb_group ._group_actions .append (tp_act )
268269 tb_group ._group_actions .append (cache_count_act )
270+ tb_group ._group_actions .append (session_len_act )
269271 tb_group ._group_actions .append (cache_block_seq_len_act )
270272 tb_group ._group_actions .append (prefix_caching_act )
271273 ArgumentHelper .model_format (tb_group , default = 'hf' )
@@ -287,6 +289,7 @@ def main():
287289 max_batch_size = args .concurrency ,
288290 tp = args .tp ,
289291 cache_max_entry_count = args .cache_max_entry_count ,
292+ session_len = args .session_len ,
290293 cache_block_seq_len = args .cache_block_seq_len ,
291294 model_format = args .model_format ,
292295 quant_policy = args .quant_policy ,
@@ -298,6 +301,7 @@ def main():
298301 elif args .backend == 'pytorch' :
299302 engine_config = PytorchEngineConfig (
300303 cache_max_entry_count = args .cache_max_entry_count ,
304+ session_len = args .session_len ,
301305 block_size = args .cache_block_seq_len ,
302306 max_batch_size = args .concurrency ,
303307 tp = args .tp ,
0 commit comments