diff --git a/benchmarks/courselab_bench/data/cmu_15-213/task_bomb_lab/compose.yaml b/benchmarks/courselab_bench/data/cmu_15-213/task_bomb_lab/compose.yaml new file mode 100644 index 00000000..58dc0f5f --- /dev/null +++ b/benchmarks/courselab_bench/data/cmu_15-213/task_bomb_lab/compose.yaml @@ -0,0 +1,8 @@ +services: + default: + network_mode: host + image: gcc:12 + command: sleep infinity + working_dir: /workspace + x-init: + - preprocess.sh diff --git a/benchmarks/courselab_bench/data/cmu_15-213/task_bomb_lab/config.json b/benchmarks/courselab_bench/data/cmu_15-213/task_bomb_lab/config.json new file mode 100644 index 00000000..962c5f55 --- /dev/null +++ b/benchmarks/courselab_bench/data/cmu_15-213/task_bomb_lab/config.json @@ -0,0 +1,15 @@ +{ + "instance_id": "cmu_15-213__bomb_lab", + "course_id": "cmu_15-213", + "timeout_minutes": 30, + "tags": [ + "reverse-engineering", + "debugging", + "x86-64", + "binary-analysis" + ], + "artifacts": [ + "solution.txt", + "bomb_output.txt" + ] +} diff --git a/benchmarks/courselab_bench/data/cmu_15-213/task_bomb_lab/evaluate.sh b/benchmarks/courselab_bench/data/cmu_15-213/task_bomb_lab/evaluate.sh new file mode 100755 index 00000000..fd46c845 --- /dev/null +++ b/benchmarks/courselab_bench/data/cmu_15-213/task_bomb_lab/evaluate.sh @@ -0,0 +1,52 @@ +#!/bin/bash +set -euo pipefail + +echo "=== Evaluating CMU 15-213 Bomb Lab ===" + +cd /workspace + +# Verify reference artifacts haven't been modified +if [ -f /tmp/checksums/protected.sha256 ]; then + echo "Checking protected files" + if ! sha256sum -c /tmp/checksums/protected.sha256; then + echo "FAIL: Protected starter files were modified (bomb, bomb.c, README.bomb)" + exit 1 + fi +fi + +if [ ! -f solution.txt ]; then + echo "FAIL: solution.txt not found. Write six input lines (one per bomb phase)." + exit 1 +fi + +line_count=$(wc -l < solution.txt || echo 0) +if [ "$line_count" -lt 6 ]; then + echo "FAIL: solution.txt must contain at least six lines (one per phase)." + exit 1 +fi + +# Ensure the binary is executable +chmod +x bomb + +echo "Running bomb with provided solution" +if ! timeout 120 ./bomb solution.txt > bomb_output.txt 2>&1; then + echo "FAIL: bomb execution failed or timed out" + cat bomb_output.txt || true + exit 1 +fi + +echo "Checking bomb output" +if grep -q "BOOM!!!" bomb_output.txt; then + echo "FAIL: Bomb exploded." + cat bomb_output.txt + exit 1 +fi + +if grep -q "Congratulations! You've defused the bomb!" bomb_output.txt; then + echo "PASS: Bomb defused" + exit 0 +fi + +echo "FAIL: Bomb did not report success" +cat bomb_output.txt +exit 1 diff --git a/benchmarks/courselab_bench/data/cmu_15-213/task_bomb_lab/preprocess.sh b/benchmarks/courselab_bench/data/cmu_15-213/task_bomb_lab/preprocess.sh new file mode 100755 index 00000000..d98ddb18 --- /dev/null +++ b/benchmarks/courselab_bench/data/cmu_15-213/task_bomb_lab/preprocess.sh @@ -0,0 +1,35 @@ +#!/bin/bash +set -euo pipefail + +echo "=== Setting up CMU 15-213 Bomb Lab ===" + +cd /workspace + +echo "Ensuring bomb assets are present" +required_files="bomb bomb.c README.bomb" +for file in $required_files; do + if [ ! -f "$file" ]; then + echo "ERROR: Missing required starter file: $file" + exit 1 + fi + echo " ✓ $file" +done + +# Install debugging essentials (gcc:12 is minimal) +echo "Installing debugging tools (gdb, binutils, procps, file)" +apt-get update +apt-get install -y gdb binutils procps file + +# Provide a working solution file if the agent wants to edit in place +if [ ! -f solution.txt ]; then + touch solution.txt +fi + +# Make sure the bomb binary is executable +chmod +x bomb + +# Record checksums to protect reference artifacts +mkdir -p /tmp/checksums +sha256sum bomb bomb.c README.bomb > /tmp/checksums/protected.sha256 + +echo "Bomb Lab setup complete. Use gdb/objdump/strings to recover all six inputs and write them to solution.txt." diff --git a/benchmarks/courselab_bench/data/cmu_15-213/task_bomb_lab/sol.sh b/benchmarks/courselab_bench/data/cmu_15-213/task_bomb_lab/sol.sh new file mode 100755 index 00000000..aaf12921 --- /dev/null +++ b/benchmarks/courselab_bench/data/cmu_15-213/task_bomb_lab/sol.sh @@ -0,0 +1,18 @@ +#!/bin/bash +set -euo pipefail + +cd /workspace + +cat > solution.txt <<'EOF_SOL' +Border relations with Canada have never been better. +1 2 4 8 16 32 +7 327 +7 0 +9on567 +4 3 2 1 6 5 +EOF_SOL + +chmod +x bomb +./bomb solution.txt > /tmp/bomb_sol_output.txt +grep -q "Congratulations! You've defused the bomb!" /tmp/bomb_sol_output.txt +printf "Bomb lab reference solution produced:\n%s\n" "$(tail -n 5 /tmp/bomb_sol_output.txt)" diff --git a/benchmarks/courselab_bench/data/cmu_15-213/task_bomb_lab/starter/README.bomb b/benchmarks/courselab_bench/data/cmu_15-213/task_bomb_lab/starter/README.bomb new file mode 100644 index 00000000..93d456db --- /dev/null +++ b/benchmarks/courselab_bench/data/cmu_15-213/task_bomb_lab/starter/README.bomb @@ -0,0 +1,71 @@ +####################################################### +# CS:APP Bomb Lab +# Directions to Instructors +# +# Copyright (c) 2003-2016, R. Bryant and D. O'Hallaron +# +####################################################### + +This directory contains the files that you will use to build and run +the CS:APP Bomb Lab. The Bomb Lab teaches students principles of +machine-level programs, as well as general debugger and reverse +engineering skills. + +*********** +1. Overview +*********** + +---- +1.1. Binary Bombs +---- +A "binary bomb" is a Linux executable C program that consists of six +"phases." Each phase expects the student to enter a particular string +on stdin. If the student enters the expected string, then that phase +is "defused." Otherwise the bomb "explodes" by printing "BOOM!!!". +The goal for the students is to defuse as many phases as possible. + +---- +1.2. Solving Binary Bombs +---- +In order to defuse the bomb, students must use a debugger, typically +gdb or ddd, to disassemble the binary and single-step through the +machine code in each phase. The idea is to understand what each +assembly statement does, and then use this knowledge to infer the +defusing string. Students earn points for defusing phases, and they +lose points (configurable by the instructor, but typically 1/2 point) +for each explosion. Thus, they quickly learn to set breakpoints before +each phase and the function that explodes the bomb. It's a great +lesson and forces them to learn to use a debugger. + + +******************* +1. Bomb Terminology +******************* + +LabID: Each instance (offering) of the lab is identified by a unique +name, e.g., "f12" or "s13", that the instructor chooses. Explosion and +diffusions from bombs whose LabIDs are different from the current +LabID are ignored. The LabID must not have any spaces. + +BombID: Each bomb in a given instance of the lab has a unique +non-negative integer called the "bombID." + +Notifying Bomb: A bomb can be compiled with a NOTIFY option that +causes the bomb to send a message each time the student explodes or +defuses a phase. Such bombs are called "notifying bombs." + +Quiet Bomb: If compiled with the NONOTIFY option, then the bomb +doesn't send any messages when it explodes or is defused. Such bombs +are called "quiet bombs." + +We will also find it helpful to distinguish between custom and +generic bombs: + +Custom Bomb: A "custom bomb" has a BombID > 0, is associated with a +particular student, and can be either notifying or quiet. Custom +notifying bombs are constrained to run on a specific set of Linux +hosts determined by the instructor. On the other hand, custom quiet +bombs can run on any Linux host. + +Generic Bomb: A "generic bomb" has a BombID = 0, isn't associated with +any particular student, is quiet, and hence can run on any host. \ No newline at end of file diff --git a/benchmarks/courselab_bench/data/cmu_15-213/task_bomb_lab/starter/bomb b/benchmarks/courselab_bench/data/cmu_15-213/task_bomb_lab/starter/bomb new file mode 100755 index 00000000..f59281d2 Binary files /dev/null and b/benchmarks/courselab_bench/data/cmu_15-213/task_bomb_lab/starter/bomb differ diff --git a/benchmarks/courselab_bench/data/cmu_15-213/task_bomb_lab/starter/bomb.c b/benchmarks/courselab_bench/data/cmu_15-213/task_bomb_lab/starter/bomb.c new file mode 100644 index 00000000..5a39ab48 --- /dev/null +++ b/benchmarks/courselab_bench/data/cmu_15-213/task_bomb_lab/starter/bomb.c @@ -0,0 +1,115 @@ +/*************************************************************************** + * Dr. Evil's Insidious Bomb, Version 1.1 + * Copyright 2011, Dr. Evil Incorporated. All rights reserved. + * + * LICENSE: + * + * Dr. Evil Incorporated (the PERPETRATOR) hereby grants you (the + * VICTIM) explicit permission to use this bomb (the BOMB). This is a + * time limited license, which expires on the death of the VICTIM. + * The PERPETRATOR takes no responsibility for damage, frustration, + * insanity, bug-eyes, carpal-tunnel syndrome, loss of sleep, or other + * harm to the VICTIM. Unless the PERPETRATOR wants to take credit, + * that is. The VICTIM may not distribute this bomb source code to + * any enemies of the PERPETRATOR. No VICTIM may debug, + * reverse-engineer, run "strings" on, decompile, decrypt, or use any + * other technique to gain knowledge of and defuse the BOMB. BOMB + * proof clothing may not be worn when handling this program. The + * PERPETRATOR will not apologize for the PERPETRATOR's poor sense of + * humor. This license is null and void where the BOMB is prohibited + * by law. + ***************************************************************************/ + +#include +#include +#include "support.h" +#include "phases.h" + +/* + * Note to self: Remember to erase this file so my victims will have no + * idea what is going on, and so they will all blow up in a + * spectaculary fiendish explosion. -- Dr. Evil + */ + +FILE *infile; + +int main(int argc, char *argv[]) +{ + char *input; + + /* Note to self: remember to port this bomb to Windows and put a + * fantastic GUI on it. */ + + /* When run with no arguments, the bomb reads its input lines + * from standard input. */ + if (argc == 1) { + infile = stdin; + } + + /* When run with one argument , the bomb reads from + * until EOF, and then switches to standard input. Thus, as you + * defuse each phase, you can add its defusing string to and + * avoid having to retype it. */ + else if (argc == 2) { + if (!(infile = fopen(argv[1], "r"))) { + printf("%s: Error: Couldn't open %s\n", argv[0], argv[1]); + exit(8); + } + } + + /* You can't call the bomb with more than 1 command line argument. */ + else { + printf("Usage: %s []\n", argv[0]); + exit(8); + } + + /* Do all sorts of secret stuff that makes the bomb harder to defuse. */ + initialize_bomb(); + + printf("Welcome to my fiendish little bomb. You have 6 phases with\n"); + printf("which to blow yourself up. Have a nice day!\n"); + + /* Hmm... Six phases must be more secure than one phase! */ + input = read_line(); /* Get input */ + phase_1(input); /* Run the phase */ + phase_defused(); /* Drat! They figured it out! + * Let me know how they did it. */ + printf("Phase 1 defused. How about the next one?\n"); + + /* The second phase is harder. No one will ever figure out + * how to defuse this... */ + input = read_line(); + phase_2(input); + phase_defused(); + printf("That's number 2. Keep going!\n"); + + /* I guess this is too easy so far. Some more complex code will + * confuse people. */ + input = read_line(); + phase_3(input); + phase_defused(); + printf("Halfway there!\n"); + + /* Oh yeah? Well, how good is your math? Try on this saucy problem! */ + input = read_line(); + phase_4(input); + phase_defused(); + printf("So you got that one. Try this one.\n"); + + /* Round and 'round in memory we go, where we stop, the bomb blows! */ + input = read_line(); + phase_5(input); + phase_defused(); + printf("Good work! On to the next...\n"); + + /* This phase will never be used, since no one will get past the + * earlier ones. But just in case, make this one extra hard. */ + input = read_line(); + phase_6(input); + phase_defused(); + + /* Wow, they got it! But isn't something... missing? Perhaps + * something they overlooked? Mua ha ha ha ha! */ + + return 0; +} diff --git a/benchmarks/courselab_bench/data/cmu_15-213/task_bomb_lab/starter/task.md b/benchmarks/courselab_bench/data/cmu_15-213/task_bomb_lab/starter/task.md new file mode 100644 index 00000000..61fa5890 --- /dev/null +++ b/benchmarks/courselab_bench/data/cmu_15-213/task_bomb_lab/starter/task.md @@ -0,0 +1,33 @@ +# CMU 15-213: Bomb Lab + +You are given a pre-built binary bomb. It has six phases. Each phase expects a specific input line; if any line is wrong the bomb explodes. Your job is to reverse engineer the binary and write the correct six lines to a file called `solution.txt`. + +The binary and supporting materials are in the starter directory: + +- `bomb`: the ELF64 binary bomb +- `bomb.c`: the main driver (does not reveal the phase internals) +- `README.bomb`: background about the lab + +## Task + +1. Work inside `/workspace`. +2. Recover the correct input for each of the six phases of the bomb. +3. Write the six inputs in order, one per line, to `solution.txt`. +4. Do not modify the starter artifacts (`bomb`, `bomb.c`, `README.bomb`). + +The grading script will run `./bomb solution.txt` and expects the bomb to report success without ever printing `BOOM!!!`. + +## Useful commands + +- `strings bomb` to scan embedded text +- `objdump -d bomb | less` to disassemble +- `gdb ./bomb` to step through phases (set breakpoints on `phase_1`…`phase_6`) +- `./bomb` to run interactively during testing + +## What is evaluated + +- `solution.txt` exists and has at least six lines +- Starter files are unchanged +- `./bomb solution.txt` completes without exploding and prints "Congratulations! You've defused the bomb!" + +Secret phases are not required. Focus only on phases 1–6. \ No newline at end of file diff --git a/benchmarks/courselab_bench/data/cmu_15-213/task_bomb_lab/task.md b/benchmarks/courselab_bench/data/cmu_15-213/task_bomb_lab/task.md new file mode 100644 index 00000000..d00d7ee7 --- /dev/null +++ b/benchmarks/courselab_bench/data/cmu_15-213/task_bomb_lab/task.md @@ -0,0 +1,34 @@ +# CMU 15-213: Bomb Lab + +You are given a pre-built binary bomb. It has six phases. Each phase expects a specific input line; if any line is wrong the bomb explodes. Your job is to reverse engineer the binary and write the correct six lines to a file called `solution.txt`. + +The binary and supporting materials are in the starter directory: + +- `bomb`: the ELF64 binary bomb +- `bomb.c`: the main driver (does not reveal the phase internals) +- `README.bomb`: background about the lab +- `task.md`: this task description + +## Task + +1. Work inside `/workspace`. +2. Recover the correct input for each of the six phases of the bomb. +3. Write the six inputs in order, one per line, to `solution.txt`. +4. Do not modify the starter artifacts (`bomb`, `bomb.c`, `README.bomb`). + +The grading script will run `./bomb solution.txt` and expects the bomb to report success without ever printing `BOOM!!!`. + +## Useful commands + +- `strings bomb` to scan embedded text +- `objdump -d bomb | less` to disassemble +- `gdb ./bomb` to step through phases (set breakpoints on `phase_1`…`phase_6`) +- `./bomb` to run interactively during testing + +## What is evaluated + +- `solution.txt` exists and has at least six lines +- Starter files are unchanged +- `./bomb solution.txt` completes without exploding and prints "Congratulations! You've defused the bomb!" + +Secret phases are not required. Focus only on phases 1–6. diff --git a/benchmarks/courselab_bench/data/courses.json b/benchmarks/courselab_bench/data/courses.json index 5a5bf845..6906deb3 100644 --- a/benchmarks/courselab_bench/data/courses.json +++ b/benchmarks/courselab_bench/data/courses.json @@ -26,7 +26,7 @@ "name": "CMU 15-213: Introduction to Computer Systems", "institution": "CMU", "year": 2024, - "num_tasks": 1 + "num_tasks": 2 } ] }