Skip to content

Commit 65bc536

Browse files
authored
Fix DeepCompile benchmark script (#973)
* update description of versions for deepcompile * fix deepcompile benchmark script Signed-off-by: Masahiro Tanaka <[email protected]> * fix benchmark for z1 Signed-off-by: Masahiro Tanaka <[email protected]> * add options for deepcompile bench Signed-off-by: Masahiro Tanaka <[email protected]> --------- Signed-off-by: Masahiro Tanaka <[email protected]>
1 parent ce39bf0 commit 65bc536

File tree

6 files changed

+39
-16
lines changed

6 files changed

+39
-16
lines changed

benchmarks/deepcompile/run.sh

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
#!/bin/bash
22

3-
4-
NUM_NODES=${NUM_NODES:-$(wc -l < /job/hostfile)}
3+
NUM_NODES=${NUM_NODES:-1}
54
NGPUS_PER_NODE=${NGPUS_PER_NODE:-$(nvidia-smi --query-gpu=name --format=csv,noheader | wc -l)}
65
NUM_PROCESSES=$((${NUM_NODES} * ${NGPUS_PER_NODE}))
76

@@ -95,6 +94,14 @@ while [[ $# -gt 0 ]]; do
9594
EXTRA_OPTS="${EXTRA_OPTS} --num_layers $2"
9695
shift 2
9796
;;
97+
--attn-impl)
98+
EXTRA_OPTS="${EXTRA_OPTS} --attn_impl $2"
99+
shift 2
100+
;;
101+
--eval)
102+
EXTRA_OPTS="${EXTRA_OPTS} --eval"
103+
shift
104+
;;
98105
--debug-log)
99106
DEBUG_LOG=1
100107
shift
@@ -217,7 +224,7 @@ echo "Logging to ${LOG_FILE}"
217224
${HOME}/.local/bin/accelerate launch --main_process_ip ${HOST_IP} --main_process_port 12345 \
218225
--num_machines ${NUM_NODES} --num_processes ${NUM_PROCESSES} --machine_rank ${MACHINE_RANK} \
219226
--config_file configs/config.yaml \
220-
run_acc_lm.py \
227+
run_bench_lm.py \
221228
--model_name "${MODEL}" \
222229
--zero_stage ${ZERO_STAGE} \
223230
${GAS_OPTS} \

benchmarks/deepcompile/run_bench.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ DC_OPTS="--compile --deepcompile"
66
ACC_OPTS="--gradient-accumulation-steps 1"
77
AC_OPTS="--activation-checkpointing"
88

9+
export NUM_NODES=${NUM_NODES:-4}
10+
911
MODEL="meta-llama/Meta-Llama-3-70B-Instruct"
1012
BATCH_SIZE_OPTS=(1 2 4)
1113
SEQ_LENGTH_OPTS=(512 1024 2048)

benchmarks/deepcompile/run_bench_acc.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ COMPILE_OPTS="--compile"
55
N3Z_OPTS="--compile --deepcompile"
66
AC_OPTS="--activation-checkpointing"
77

8+
export NUM_NODES=${NUM_NODES:-4}
9+
810
MODEL="meta-llama/Meta-Llama-3-70B-Instruct"
911
BATCH_SIZE_OPTS=(1)
1012
SEQ_LENGTH_OPTS=(1024)

benchmarks/deepcompile/run_bench_lm.py

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,6 @@
1515

1616
from datasets.utils.logging import disable_progress_bar
1717

18-
from patch_phi3_moe import patch_phi3moe
19-
2018
def get_args():
2119
parser = argparse.ArgumentParser()
2220
parser.add_argument("--model_name", type=str, default="meta-llama/Llama-2-7b-hf")
@@ -27,6 +25,7 @@ def get_args():
2725
parser.add_argument("--max_grad_norm", type=float, default=1.0)
2826
parser.add_argument("--gradient_accumulation_steps", type=int, default=1)
2927
parser.add_argument("--activation_checkpointing", action="store_true")
28+
parser.add_argument("--eval", action="store_true")
3029
parser.add_argument("--dataset_name", type=str, default="timdettmers/openassistant-guanaco")
3130
parser.add_argument("--num_layers", type=int, default=0)
3231
parser.add_argument("--attn_impl", type=str, default="spda")
@@ -74,7 +73,7 @@ def main():
7473
args = get_args()
7574
print(args)
7675

77-
if "offload_adam_states" in args.passes:
76+
if args.passes is not None and "offload_adam_states" in args.passes:
7877
os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'max_split_size_mb:128'
7978

8079
if args.deterministic:
@@ -98,16 +97,13 @@ def main():
9897
model = AutoModelForCausalLM.from_pretrained(model_weight_path, trust_remote_code=True)
9998
else:
10099
if args.num_layers > 0:
101-
model_config = AutoConfig.from_pretrained(model_name, trust_remote_code=True)
100+
model_config = AutoConfig.from_pretrained(model_name, attn_implementation=args.attn_impl, trust_remote_code=True)
102101
print(f"num_hidden_layers: {model_config.num_hidden_layers} -> {args.num_layers}")
103102
model_config.num_hidden_layers = args.num_layers
104103
model = AutoModelForCausalLM.from_config(model_config, trust_remote_code=True)
105104
else:
106105
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
107106

108-
if patch_phi3moe(model) and accelerator.is_main_process:
109-
print("Patched Phi-3.5-MoE model")
110-
111107
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
112108

113109
if args.save_weights and accelerator.is_main_process:
@@ -149,7 +145,6 @@ def tokenize_function(examples):
149145
torch._dynamo.config.capture_dynamic_output_shape_ops = True
150146
torch._dynamo.config.capture_scalar_outputs = True
151147

152-
153148
if is_deepspeed:
154149
if args.compile:
155150
schedule = make_schedule(args.passes.split(","), warmup=5) if args.passes else None
@@ -185,10 +180,13 @@ def tokenize_function(examples):
185180
on_trace_ready=torch.profiler.tensorboard_trace_handler(prof_dir),
186181
) if do_profile else nullcontext()
187182

188-
# Training loop
189-
model.train()
190-
global_step = 0
183+
# Training
184+
if args.eval:
185+
model.eval()
186+
else:
187+
model.train()
191188

189+
global_step = 0
192190
iter_times = []
193191

194192
# See https://github.com/microsoft/DeepSpeed/issues/6793

benchmarks/deepcompile/run_bench_z1.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ DC_OPTS="--compile --deepcompile"
66
ACC_OPTS="--gradient-accumulation-steps 1"
77
AC_OPTS="--activation-checkpointing"
88

9+
export NUM_NODES=${NUM_NODES:-4}
10+
911
MODEL="meta-llama/Meta-Llama-3-8B-Instruct"
1012
BATCH_SIZE_OPTS=(1 2 4)
1113
SEQ_LENGTH_OPTS=(512 1024 2048)

benchmarks/deepcompile/run_multinode.sh

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,23 @@ echo $*
44

55
SCRIPT_DIR=$(dirname $(realpath $0))
66
HOST_IP=$(hostname -i)
7-
NUM_NODES=${NUM_NODES:-$(wc -l < /job/hostfile)}
7+
NUM_NODES=${NUM_NODES:-1}
8+
9+
# verify that NUM_NODES is a positive integer
10+
if ! [[ "$NUM_NODES" =~ ^[1-9][0-9]*$ ]]; then
11+
echo "Error: NUM_NODES must be a positive integer"
12+
exit 1
13+
fi
14+
15+
# check if NUM_NODES ==1 or hostfile_n${NUM_NODES} exists
16+
if [ ! -f hostfile_n${NUM_NODES} ] && [ "${NUM_NODES}" != "1" ]; then
17+
echo "Error: hostfile_n${NUM_NODES} does not exist"
18+
exit 1
19+
fi
820

921
if [ "${NUM_NODES}" == "1" ]; then
1022
# avoid dependency on pdsh when possible
1123
cd ${SCRIPT_DIR}; bash ./run.sh --host-ip ${HOST_IP} $*
1224
else
13-
ds_ssh -f hostfile_n${NUM_NODES} "cd ${SCRIPT_DIR}; bash ./run.sh --host-ip ${HOST_IP} $*"
25+
ds_ssh -f hostfile_n${NUM_NODES} "cd ${SCRIPT_DIR}; NUM_NODES=${NUM_NODES} bash ./run.sh --host-ip ${HOST_IP} $*"
1426
fi

0 commit comments

Comments
 (0)