diff --git a/benchmarks/courselab_bench/courselab/courselab.py b/benchmarks/courselab_bench/courselab/courselab.py index c11a8a48..71fd2f12 100644 --- a/benchmarks/courselab_bench/courselab/courselab.py +++ b/benchmarks/courselab_bench/courselab/courselab.py @@ -1,13 +1,33 @@ from importlib.metadata import version from pathlib import Path +from typing import Any from inspect_ai import Task, task -from inspect_ai.agent import Agent -from inspect_ai.solver import Solver +from inspect_ai.agent import Agent, as_solver +from inspect_ai.solver import Solver, TaskState, solver from courselab.dataset import load_dataset, load_courses_metadata from courselab.scorer import lab_scorer -from courselab.solver import lab_agent +from courselab.solver import lab_agent, multi_round_solver + + +@solver +def adaptive_solver(base_agent: Agent) -> Solver: + """Solver that adapts to single-round or multi-round tasks.""" + # Convert agent to solver + agent_solver = as_solver(base_agent) + + async def solve(state: TaskState, generate: Any) -> TaskState: + # Check if this is a multi-round task + if state.metadata.get("multi_round", False): + # Use multi-round solver + multi_round = multi_round_solver(base_agent) + return await multi_round(state, generate) + else: + # Use base agent normally + return await agent_solver(state, generate) + + return solve @task @@ -20,9 +40,18 @@ def courselab( dataset = load_dataset(task_dir=task_dir, task_ids=task_ids) metadata = load_courses_metadata(data_dir=Path(task_dir) if task_dir else None) + # Use adaptive solver that handles both single and multi-round tasks + # If a custom solver is provided, use it directly (solution_agent is not affected) + if agent is not None and isinstance(agent, Solver): + solver_to_use = agent + else: + # Use default agent or provided agent, wrapped with adaptive solver + base_agent = agent if isinstance(agent, Agent) else lab_agent() + solver_to_use = adaptive_solver(base_agent) + return Task( dataset=dataset, - solver=agent or lab_agent(), + solver=solver_to_use, scorer=lab_scorer(), max_messages=max_turns, metadata={ diff --git a/benchmarks/courselab_bench/courselab/dataset.py b/benchmarks/courselab_bench/courselab/dataset.py index cda4ac37..cccf4393 100644 --- a/benchmarks/courselab_bench/courselab/dataset.py +++ b/benchmarks/courselab_bench/courselab/dataset.py @@ -29,7 +29,29 @@ def load_dataset( if task_ids and instance_id not in task_ids: continue - task_description = (task_folder / "task.md").read_text() + # Check for multi-round task (task1.md, task2.md, etc.) + multi_round = False + num_rounds = 0 + round_tasks = {} + + # Check if task1.md exists + if (task_folder / "task1.md").exists(): + multi_round = True + # Find all taskX.md files + for i in range(1, 100): # Support up to 99 rounds + task_file = task_folder / f"task{i}.md" + if task_file.exists(): + round_tasks[i] = task_file.read_text() + num_rounds = i + else: + break + + # Use single task.md if not multi-round + if not multi_round: + task_description = (task_folder / "task.md").read_text() + else: + # For multi-round tasks, use task1.md as initial input + task_description = round_tasks[1] preprocess_path = task_folder / "preprocess.sh" setup_script = preprocess_path.read_text() if preprocess_path.exists() else None @@ -55,17 +77,27 @@ def load_dataset( f"compose.yaml required but not found in {task_folder}" ) + metadata = { + "task_folder": str(task_folder.absolute()), + "course_id": config.get("course_id"), + "artifacts": config.get("artifacts", []), + "tags": config.get("tags", []), + } + + # Add multi-round information if applicable + if multi_round: + metadata["multi_round"] = True + metadata["num_rounds"] = num_rounds + metadata["round_tasks"] = round_tasks + else: + metadata["multi_round"] = False + samples.append( Sample( id=instance_id, input=task_description, target="success", - metadata={ - "task_folder": str(task_folder.absolute()), - "course_id": config.get("course_id"), - "artifacts": config.get("artifacts", []), - "tags": config.get("tags", []), - }, + metadata=metadata, sandbox=SandboxEnvironmentSpec( type="docker", config=str(compose_file.absolute()), diff --git a/benchmarks/courselab_bench/courselab/scorer.py b/benchmarks/courselab_bench/courselab/scorer.py index 86e6c83a..e0c13388 100644 --- a/benchmarks/courselab_bench/courselab/scorer.py +++ b/benchmarks/courselab_bench/courselab/scorer.py @@ -8,41 +8,120 @@ def lab_scorer() -> Scorer: async def score(state: TaskState, target: Target) -> Score: task_folder = Path(state.metadata["task_folder"]) - evaluate_script = (task_folder / "evaluate.sh").read_text() + + # Check if this is a multi-round task + if state.metadata.get("multi_round", False): + # Multi-round task: evaluate each round and take AND of results + num_rounds = state.metadata["num_rounds"] + round_results = [] + all_passed = True + combined_explanation = [] + + for round_num in range(1, num_rounds + 1): + evaluate_file = task_folder / f"evaluate{round_num}.sh" + + if not evaluate_file.exists(): + # If evaluateX.sh doesn't exist, skip this round + combined_explanation.append(f"Round {round_num}: evaluate{round_num}.sh not found, skipping") + continue + + evaluate_script = evaluate_file.read_text() + script_name = f"evaluate{round_num}.sh" + + await sandbox().write_file(script_name, evaluate_script) + await sandbox().exec(["chmod", "+x", script_name]) + + result = await sandbox().exec(["bash", script_name]) + + round_passed = result.success + all_passed = all_passed and round_passed + + round_results.append({ + "round": round_num, + "passed": round_passed, + "stdout": result.stdout, + "stderr": result.stderr, + }) + + combined_explanation.append( + f"Round {round_num}: {'PASS' if round_passed else 'FAIL'}\n" + f"{result.stdout}\n" + f"{result.stderr if result.stderr else ''}" + ) + + # Collect artifacts after all rounds + artifacts = {} + artifact_patterns = state.metadata.get("artifacts", []) + if artifact_patterns: + for pattern in artifact_patterns: + glob_result = await sandbox().exec( + ["bash", "-c", f"ls {pattern} 2>/dev/null || true"] + ) + if glob_result.success and glob_result.stdout.strip(): + for file_path in glob_result.stdout.strip().split("\n"): + try: + content = await sandbox().read_file(file_path) + artifacts[file_path] = content + except Exception as e: + artifacts[file_path] = f"Error reading file: {str(e)}" + + explanation = "\n\n".join(combined_explanation) + metadata = { + "round_results": round_results, + "artifacts": artifacts if artifacts else None, + } + + if all_passed: + return Score( + value="C", + answer="PASS", + explanation=explanation, + metadata=metadata, + ) + else: + return Score( + value="I", + answer="FAIL", + explanation=explanation, + metadata=metadata, + ) + else: + # Single-round task: use existing logic + evaluate_script = (task_folder / "evaluate.sh").read_text() - await sandbox().write_file("evaluate.sh", evaluate_script) - await sandbox().exec(["chmod", "+x", "evaluate.sh"]) + await sandbox().write_file("evaluate.sh", evaluate_script) + await sandbox().exec(["chmod", "+x", "evaluate.sh"]) - result = await sandbox().exec(["bash", "evaluate.sh"]) + result = await sandbox().exec(["bash", "evaluate.sh"]) - artifacts = {} - artifact_patterns = state.metadata.get("artifacts", []) - if artifact_patterns: - for pattern in artifact_patterns: - glob_result = await sandbox().exec( - ["bash", "-c", f"ls {pattern} 2>/dev/null || true"] - ) - if glob_result.success and glob_result.stdout.strip(): - for file_path in glob_result.stdout.strip().split("\n"): - try: - content = await sandbox().read_file(file_path) - artifacts[file_path] = content - except Exception as e: - artifacts[file_path] = f"Error reading file: {str(e)}" + artifacts = {} + artifact_patterns = state.metadata.get("artifacts", []) + if artifact_patterns: + for pattern in artifact_patterns: + glob_result = await sandbox().exec( + ["bash", "-c", f"ls {pattern} 2>/dev/null || true"] + ) + if glob_result.success and glob_result.stdout.strip(): + for file_path in glob_result.stdout.strip().split("\n"): + try: + content = await sandbox().read_file(file_path) + artifacts[file_path] = content + except Exception as e: + artifacts[file_path] = f"Error reading file: {str(e)}" - if result.success: - return Score( - value="C", - answer="PASS", - explanation=result.stdout, - metadata={"artifacts": artifacts} if artifacts else None, - ) - else: - return Score( - value="I", - answer="FAIL", - explanation=result.stderr or result.stdout, - metadata={"artifacts": artifacts} if artifacts else None, - ) + if result.success: + return Score( + value="C", + answer="PASS", + explanation=result.stdout, + metadata={"artifacts": artifacts} if artifacts else None, + ) + else: + return Score( + value="I", + answer="FAIL", + explanation=result.stderr or result.stdout, + metadata={"artifacts": artifacts} if artifacts else None, + ) return score diff --git a/benchmarks/courselab_bench/courselab/solver.py b/benchmarks/courselab_bench/courselab/solver.py index b3062634..47280645 100644 --- a/benchmarks/courselab_bench/courselab/solver.py +++ b/benchmarks/courselab_bench/courselab/solver.py @@ -1,10 +1,11 @@ from pathlib import Path from textwrap import dedent from typing import Any -from inspect_ai.agent import Agent, agent, react +from inspect_ai.agent import Agent, agent, react, as_solver from inspect_ai.solver import Solver, TaskState, solver from inspect_ai.tool import bash from inspect_ai.util import sandbox +from inspect_ai.model import ChatMessageUser, ChatMessageAssistant @agent @@ -23,6 +24,48 @@ def lab_agent() -> Agent: ) +@solver +def multi_round_solver(base_agent: Agent) -> Solver: + """Solver that handles multi-round tasks with independent conversation contexts.""" + # Convert agent to solver + agent_solver = as_solver(base_agent) + + async def solve(state: TaskState, generate: Any) -> TaskState: + # Check if this is a multi-round task + if not state.metadata.get("multi_round", False): + # Not a multi-round task, use the base agent normally + return await agent_solver(state, generate) + + # Multi-round task: run each round with independent conversation context + num_rounds = state.metadata["num_rounds"] + round_tasks = state.metadata["round_tasks"] + + # Store original messages + original_messages = state.messages.copy() + + # Run each round with independent conversation context + for round_num in range(1, num_rounds + 1): + # Reset messages for this round (independent context) + # Note: state.input is read-only, so we only reset messages + state.messages = [ChatMessageUser(content=round_tasks[round_num])] + state.metadata["current_round"] = round_num + state.metadata["total_rounds"] = num_rounds + + # Run the agent for this round + state = await agent_solver(state, generate) + + # Store round results in metadata + if "round_results" not in state.metadata: + state.metadata["round_results"] = {} + state.metadata["round_results"][round_num] = { + "completed": True, + } + + return state + + return solve + + @solver def solution_agent() -> Solver: async def solve(state: TaskState, generate: Any) -> TaskState: diff --git a/benchmarks/courselab_bench/data/cmu_15-445/task_cpp/compose.yaml b/benchmarks/courselab_bench/data/cmu_15-445/task_cpp/compose.yaml new file mode 100644 index 00000000..b3e25073 --- /dev/null +++ b/benchmarks/courselab_bench/data/cmu_15-445/task_cpp/compose.yaml @@ -0,0 +1,7 @@ +services: + default: + image: ubuntu:22.04 + command: sleep infinity + working_dir: /workspace + x-init: + - preprocess.sh diff --git a/benchmarks/courselab_bench/data/cmu_15-445/task_cpp/config.json b/benchmarks/courselab_bench/data/cmu_15-445/task_cpp/config.json new file mode 100644 index 00000000..495ccab5 --- /dev/null +++ b/benchmarks/courselab_bench/data/cmu_15-445/task_cpp/config.json @@ -0,0 +1,15 @@ +{ + "instance_id": "cmu_15-445__count_min_sketch", + "course_id": "cmu_15-445", + "timeout_minutes": 30, + "tags": [ + "concurrency", + "cpp-programming", + "database-systems", + "algorithms" + ], + "artifacts": [ + "src/primer/count_min_sketch.cpp", + "src/primer/count_min_sketch.h" + ] +} diff --git a/benchmarks/courselab_bench/data/cmu_15-445/task_cpp/evaluate.sh b/benchmarks/courselab_bench/data/cmu_15-445/task_cpp/evaluate.sh new file mode 100644 index 00000000..f7abc111 --- /dev/null +++ b/benchmarks/courselab_bench/data/cmu_15-445/task_cpp/evaluate.sh @@ -0,0 +1,45 @@ +#!/bin/bash +set -e + +cd /workspace + +# Verify test file wasn't modified +echo "Verifying protected files were not modified" +if ! sha256sum -c /tmp/checksums/test.sha256 > /dev/null 2>&1; then + echo "FAIL: test/primer/count_min_sketch_test.cpp was modified" + exit 1 +fi +echo "Protected files unchanged" + +# Build +echo "" +echo "=== Building ===" +rm -rf build +mkdir build && cd build +cmake -DCMAKE_BUILD_TYPE=Debug .. > /dev/null 2>&1 +if ! make -j$(nproc); then + echo "FAIL: Build failed" + exit 1 +fi + +# Run tests +echo "" +echo "=== Running Tests ===" +make -j$(nproc) count_min_sketch_test > /dev/null 2>&1 +if ! ./test/count_min_sketch_test; then + echo "FAIL: Tests failed" + exit 1 +fi + +# Format check +echo "" +echo "=== Format Check ===" +make format > /dev/null 2>&1 +if ! make check-clang-tidy-p0; then + echo "FAIL: clang-tidy check failed" + exit 1 +fi + +echo "" +echo "PASS: All checks passed" +exit 0 diff --git a/benchmarks/courselab_bench/data/cmu_15-445/task_cpp/preprocess.sh b/benchmarks/courselab_bench/data/cmu_15-445/task_cpp/preprocess.sh new file mode 100644 index 00000000..de44a5f6 --- /dev/null +++ b/benchmarks/courselab_bench/data/cmu_15-445/task_cpp/preprocess.sh @@ -0,0 +1,36 @@ +#!/bin/bash +set -e + +echo "=== Setting up CMU 15-445 CountMinSketch Lab ===" + +cd /workspace + +echo "Installing git" +apt-get update > /dev/null 2>&1 +apt-get install -y git > /dev/null 2>&1 + +echo "Cloning bustub repository" +git clone https://github.com/cmu-db/bustub.git /tmp/bustub > /dev/null 2>&1 + +echo "Moving source to workspace" +mv /tmp/bustub/* ./ +mv /tmp/bustub/.clang-format ./ 2>/dev/null || true +mv /tmp/bustub/.clang-tidy ./ 2>/dev/null || true +rm -rf /tmp/bustub .git + +echo "Installing build dependencies" +build_support/packages.sh -y > /dev/null 2>&1 + +echo "Creating checksums for protected files" +mkdir -p /tmp/checksums +sha256sum test/primer/count_min_sketch_test.cpp > /tmp/checksums/test.sha256 + +echo "Building project" +mkdir -p build && cd build +cmake -DCMAKE_BUILD_TYPE=Debug .. > /dev/null 2>&1 +make -j$(nproc) > /dev/null 2>&1 + +echo "Setup complete" +echo "Agent should implement:" +echo " - src/include/primer/count_min_sketch.h" +echo " - src/primer/count_min_sketch.cpp" diff --git a/benchmarks/courselab_bench/data/cmu_15-445/task_cpp/sol.sh b/benchmarks/courselab_bench/data/cmu_15-445/task_cpp/sol.sh new file mode 100755 index 00000000..70a870c1 --- /dev/null +++ b/benchmarks/courselab_bench/data/cmu_15-445/task_cpp/sol.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# This script simulates what an agent might execute to solve the task + +cat > dummy.cpp << 'EOF' +#include + +int main() { + std::cout << "Hello, World!" << std::endl; + return 0; +} +EOF \ No newline at end of file diff --git a/benchmarks/courselab_bench/data/cmu_15-445/task_cpp/task.md b/benchmarks/courselab_bench/data/cmu_15-445/task_cpp/task.md new file mode 100644 index 00000000..961a5db8 --- /dev/null +++ b/benchmarks/courselab_bench/data/cmu_15-445/task_cpp/task.md @@ -0,0 +1,154 @@ +# The CMU 15-445 Database Lab + +**YOU ARE ONLY ALLOWED TO MODIFY OR ADD FILES IN THE src DIRECTORY.** + +## Task + +Implement a basic Count-min sketch data structure supporting insertion, count estimation, and merging. + +In `count_min_sketch.h`, following functions are to be implemented: + +- `CountMinSketch(width, depth)`: constructor, creates a count-min sketch with width columns (buckets) and depth rows (hash functions). +- `CountMinSketch(&&other)`: move constructor, transfers ownership of sketch resources from another instance. +- `operator=(&&other)`: move assignment, moves sketch resources from another instance to this one. +- `Insert(item)`: inserts the specified item into the count-min sketch. This is expected to be thread-safe. +- `Count(&item)`: returns the estimated frequency of the item. +- `Clear()`: resets the data structure from previous streams. +- `Merge(&other)`: creates a new sketch by combining counter values from two compatible sketches. +- `TopK(k, &candidates)`: practical usage of count-min sketch--given the candidates that have been stored in the count-min sketch, return the k candidates with the most estimated counts. + +## Project Specification + +Consider the following scenario: you are the administrator of a popular blog website, and you've been receiving reports on certain accounts spamming excessively. To map out overall network usage and detect potential DDoS attacks, you want a real-time approach to count how often each IP address appears in the incoming request stream. However, the stream is huge, which makes traditional data structures either too slow or too memory-hungry. This is where the Count–Min Sketch comes in! + +Count-min sketch (CM Sketch) is a probabilistic data structure that approximates frequency counts of items in a stream using sublinear memory. It maintains a compact 2-dimensional array of counters addressed by d independently seeded hash functions. Each update increments one cell per row, and a query returns the minimum of those counters. Moreover, count-min sketch is mergeable, meaning that the sum of two sketches is equivalent to constructing a single sketch over the concatenation of the corresponding input streams. Count-min sketch is widely used for network traffic monitoring, streaming analytics, and database system optimization. + +Count-min sketch is based on the following parameters: + +- `width (w)` – Number of columns in the hash matrix; each hash maps an item to an index in [0, w-1]. Larger w ⇒ smaller additive error. +- `depth (d)` – Number of rows / independent hash functions. Larger d ⇒ lower probability of a bad overestimate. +- `hash family / seeds` – A way to derive d pairwise-independent hash functions (e.g., by seeding a base hash differently for each row). + +To explain how this data structure functions, let follow the example at the beginning and consider the following input stream: `["24.156.99.202", "172.217.22.14", "64.104.78.227", "24.156.99.202"]`. Let's use a count-min sketch with width 4 and depth 3. For each of the 3 rows, hash the string (using that row's seed) to produce an integer, reduce it modulo 4 to get a column index, and increment the counter at (row, column). + +First, initialize the hash matrix (3 rows × 4 columns) with all zeros: + +``` +0 0 0 0 +0 0 0 0 +0 0 0 0 +``` + +Now process the stream. We first insert "24.156.99.202". Assume the following hash positions (mod 4): + +``` +hash1 → 2 +hash2 → 0 +hash3 → 3 +``` + +We update the cells accordingly: + +``` +0 0 1 0 +1 0 0 0 +0 0 0 1 +``` + +Next, we insert "172.217.22.14" and increment counters at the hash positions below: + +``` +hash1 → 1 +hash2 → 0 (collision with the first item in row 1) +hash3 → 2 +``` + +Update: + +``` +0 1 1 0 +2 0 0 0 +0 0 1 1 +``` + +Now we insert "64.104.78.227": + +``` +hash1 → 3 +hash2 → 1 +hash3 → 2 (collision with the second item in row 3) +``` + +The table becomes: + +``` +0 1 1 1 +2 1 0 0 +0 0 2 1 +``` + +Finally, repeat "24.156.99.202" (same hash positions as before: 2, 0, 3). Increment those cells again: + +``` +0 1 2 1 +3 1 0 0 +0 0 2 2 +``` + +Now, let's estimate the frequency of "24.156.99.202". Let's look up the same columns used when inserting that key: + +``` +Row 0, col 2 → 2 +Row 1, col 0 → 3 +Row 2, col 3 → 2 +``` + +The estimate is the minimum across rows: + +``` +Estimate("24.156.99.202") = min(2, 3, 2) = 2 +``` + +Why take the minimum? Each row's counter can be inflated by collisions with other items, but the minimum across independent hash rows gives the tightest upper bound on the true frequency. + +--- + +## Important Information + +For constructing hash functions for the matrix, please use the seeded hash function "common/util/hash_util.h" from the bustub repository. Please refrain from using hash functions from external libraries or implementing your own, since this might influence whether you pass the test suite we provide! + +The test suite includes parallel tests. However, we ONLY expect thread-safe implementation for `Insert(item)`. In other words, your implementation must correctly handle scenarios where multiple threads simultaneously perform insertions into multiple count-min sketches. + +You may notice the last test compares the performance of your implementation for `Insert(item)` against one that is strictly sequential. You could only pass this test if the relative speedup of your implementation is larger than 1.2. We expect you NOT to use only a single global latch to guard the whole data structure. If you do so, the contention ratio will be effectively around 1.0. There are many ways to do this. As a hint, try thinking of ways to break down the latch granularity or, even better, not to use a latch at all (you may find compare_exchange helpful if you aim for the latter). If you find this difficult to reason about, try passing other tests with a global latch first before attempting to optimize for this one. + +## Testing + +You can test the individual components of this assignment using our testing framework. We use GTest for unit test cases. You can disable tests in GTest by adding a `DISABLED_` prefix to the test name. To run the tests from the command-line: + +```bash +cd build +make -j$(nproc) count_min_sketch_test +./test/count_min_sketch_test +``` + +## Memory Leaks + +For this project, we use LLVM Address Sanitizer (ASAN) and Leak Sanitizer (LSAN) to check for memory errors. To enable ASAN and LSAN, configure CMake in debug mode and run tests as you normally would. If there is memory error, you will see a memory error report. Note that macOS only supports address sanitizer without leak sanitizer. + +In some cases, address sanitizer might affect the usability of the debugger. In this case, you might need to disable all sanitizers by configuring the CMake project with: + +```bash +cmake -DCMAKE_BUILD_TYPE=Debug -DBUSTUB_SANITIZER= .. +``` + +## Development Hints + +You can use `BUSTUB_ASSERT` for assertions in debug mode. Note that the statements within `BUSTUB_ASSERT` will NOT be executed in release mode. If you have something to assert in all cases, use `BUSTUB_ENSURE` instead. + +We will test your implementation in release mode. To compile your solution in release mode: + +```bash +mkdir build_rel && cd build_rel +cmake -DCMAKE_BUILD_TYPE=Release .. +make -j`nproc` +``` diff --git a/benchmarks/courselab_bench/data/cmu_15-445/task_database/compose.yaml b/benchmarks/courselab_bench/data/cmu_15-445/task_database/compose.yaml new file mode 100644 index 00000000..25079c78 --- /dev/null +++ b/benchmarks/courselab_bench/data/cmu_15-445/task_database/compose.yaml @@ -0,0 +1,9 @@ +services: + default: + image: ubuntu:22.04 + command: sleep infinity + working_dir: /workspace + x-init: + - preprocess.sh + + diff --git a/benchmarks/courselab_bench/data/cmu_15-445/task_database/config.json b/benchmarks/courselab_bench/data/cmu_15-445/task_database/config.json new file mode 100644 index 00000000..8c329afb --- /dev/null +++ b/benchmarks/courselab_bench/data/cmu_15-445/task_database/config.json @@ -0,0 +1,23 @@ +{ + "instance_id": "cmu_15-445__database_task", + "course_id": "cmu_15-445", + "timeout_minutes": 60, + "tags": [ + "multi-phase", + "cpp-programming", + "database-systems" + ], + "artifacts": [ + "src/buffer/arc_replacer.cpp", + "src/buffer/disk_scheduler.cpp", + "src/buffer/page_guard.cpp", + "src/buffer/buffer_pool_manager.cpp", + "src/storage/disk_manager.cpp", + "src/storage/page/page_guard.cpp", + "src/storage/page/page_guard.h", + "src/storage/page/page_id.h", + "src/storage/page/page_lock.h" + ] +} + + diff --git a/benchmarks/courselab_bench/data/cmu_15-445/task_database/evaluate.sh b/benchmarks/courselab_bench/data/cmu_15-445/task_database/evaluate.sh new file mode 100644 index 00000000..f6f6efc6 --- /dev/null +++ b/benchmarks/courselab_bench/data/cmu_15-445/task_database/evaluate.sh @@ -0,0 +1 @@ +# This is a dummy evaluate script to pass the CI test. Real evaluate script is in evaluate1.sh, evaluate2.sh, evaluate3.sh, evaluate4.sh \ No newline at end of file diff --git a/benchmarks/courselab_bench/data/cmu_15-445/task_database/evaluate1.sh b/benchmarks/courselab_bench/data/cmu_15-445/task_database/evaluate1.sh new file mode 100644 index 00000000..dc7be8bd --- /dev/null +++ b/benchmarks/courselab_bench/data/cmu_15-445/task_database/evaluate1.sh @@ -0,0 +1,75 @@ +#!/bin/bash +set -e + +cd /workspace + +# Verify test files weren't modified +echo "Verifying protected test files were not modified" +if ! sha256sum -c /tmp/checksums/test1_arc_replacer.sha256 > /dev/null 2>&1; then + echo "FAIL: test/buffer/arc_replacer_test.cpp was modified" + exit 1 +fi +if ! sha256sum -c /tmp/checksums/test1_disk_scheduler.sha256 > /dev/null 2>&1; then + echo "FAIL: test/storage/disk_scheduler_test.cpp was modified" + exit 1 +fi +if ! sha256sum -c /tmp/checksums/test1_page_guard.sha256 > /dev/null 2>&1; then + echo "FAIL: test/buffer/page_guard_test.cpp was modified" + exit 1 +fi +if ! sha256sum -c /tmp/checksums/test1_buffer_pool_manager.sha256 > /dev/null 2>&1; then + echo "FAIL: test/buffer/buffer_pool_manager_test.cpp was modified" + exit 1 +fi +echo "Protected test files unchanged" + +# Build +echo "" +echo "=== Building ===" +rm -rf build +mkdir build && cd build +cmake -DCMAKE_BUILD_TYPE=Debug .. > /dev/null 2>&1 +if ! make -j$(nproc); then + echo "FAIL: Build failed" + exit 1 +fi + +# Run tests +echo "" +echo "=== Running Tests ===" +make -j$(nproc) arc_replacer_test > /dev/null 2>&1 +if ! ./test/arc_replacer_test; then + echo "FAIL: Tests failed" + exit 1 +fi + +make -j$(nproc) disk_scheduler_test > /dev/null 2>&1 +if ! ./test/disk_scheduler_test; then + echo "FAIL: Tests failed" + exit 1 +fi + +make -j$(nproc) page_guard_test > /dev/null 2>&1 +if ! ./test/page_guard_test; then + echo "FAIL: Tests failed" + exit 1 +fi + +make -j$(nproc) buffer_pool_manager_test > /dev/null 2>&1 +if ! ./test/buffer_pool_manager_test; then + echo "FAIL: Tests failed" + exit 1 +fi + +# Format check +echo "" +echo "=== Format Check ===" +make format > /dev/null 2>&1 +if ! make check-clang-tidy-p0; then + echo "FAIL: clang-tidy check failed" + exit 1 +fi + +echo "" +echo "PASS: All checks passed" +exit 0 \ No newline at end of file diff --git a/benchmarks/courselab_bench/data/cmu_15-445/task_database/evaluate2.sh b/benchmarks/courselab_bench/data/cmu_15-445/task_database/evaluate2.sh new file mode 100644 index 00000000..71c0ce05 --- /dev/null +++ b/benchmarks/courselab_bench/data/cmu_15-445/task_database/evaluate2.sh @@ -0,0 +1,75 @@ +#!/bin/bash +set -e + +cd /workspace + +# Verify test files weren't modified +echo "Verifying protected test files were not modified" +if ! sha256sum -c /tmp/checksums/test2_b_plus_tree_insert.sha256 > /dev/null 2>&1; then + echo "FAIL: test/storage/b_plus_tree_insert_test.cpp was modified" + exit 1 +fi +if ! sha256sum -c /tmp/checksums/test2_b_plus_tree_sequential_scale.sha256 > /dev/null 2>&1; then + echo "FAIL: test/storage/b_plus_tree_sequential_scale_test.cpp was modified" + exit 1 +fi +if ! sha256sum -c /tmp/checksums/test2_b_plus_tree_delete.sha256 > /dev/null 2>&1; then + echo "FAIL: test/storage/b_plus_tree_delete_test.cpp was modified" + exit 1 +fi +if ! sha256sum -c /tmp/checksums/test2_b_plus_tree_concurrent.sha256 > /dev/null 2>&1; then + echo "FAIL: test/storage/b_plus_tree_concurrent_test.cpp was modified" + exit 1 +fi +echo "Protected test files unchanged" + +# Build +echo "" +echo "=== Building ===" +rm -rf build +mkdir build && cd build +cmake -DCMAKE_BUILD_TYPE=Debug .. > /dev/null 2>&1 +if ! make -j$(nproc); then + echo "FAIL: Build failed" + exit 1 +fi + +# Run tests +echo "" +echo "=== Running Tests ===" +make -j$(nproc) b_plus_tree_insert_test > /dev/null 2>&1 +if ! ./test/b_plus_tree_insert_test; then + echo "FAIL: b_plus_tree_insert_test failed" + exit 1 +fi + +make -j$(nproc) b_plus_tree_sequential_scale_test > /dev/null 2>&1 +if ! ./test/b_plus_tree_sequential_scale_test; then + echo "FAIL: b_plus_tree_sequential_scale_test failed" + exit 1 +fi + +make -j$(nproc) b_plus_tree_delete_test > /dev/null 2>&1 +if ! ./test/b_plus_tree_delete_test; then + echo "FAIL: b_plus_tree_delete_test failed" + exit 1 +fi + +make -j$(nproc) b_plus_tree_concurrent_test > /dev/null 2>&1 +if ! ./test/b_plus_tree_concurrent_test; then + echo "FAIL: b_plus_tree_concurrent_test failed" + exit 1 +fi + +# Format check +echo "" +echo "=== Format Check ===" +make format > /dev/null 2>&1 +if ! make check-clang-tidy-p2; then + echo "FAIL: clang-tidy check failed" + exit 1 +fi + +echo "" +echo "PASS: All checks passed" +exit 0 diff --git a/benchmarks/courselab_bench/data/cmu_15-445/task_database/evaluate3.sh b/benchmarks/courselab_bench/data/cmu_15-445/task_database/evaluate3.sh new file mode 100644 index 00000000..88d8ed12 --- /dev/null +++ b/benchmarks/courselab_bench/data/cmu_15-445/task_database/evaluate3.sh @@ -0,0 +1,46 @@ +#!/bin/bash +set -e + +cd /workspace + +# Verify test files weren't modified +echo "Verifying protected test files were not modified" +if ! sha256sum -c /tmp/checksums/test3_primer.sha256 > /dev/null 2>&1; then + echo "FAIL: test/sql/p3.00-primer.slt was modified" + exit 1 +fi +echo "Protected test files unchanged" + +# Build +echo "" +echo "=== Building ===" +rm -rf build +mkdir build && cd build +cmake -DCMAKE_BUILD_TYPE=Debug .. > /dev/null 2>&1 +if ! make -j$(nproc); then + echo "FAIL: Build failed" + exit 1 +fi + +# Run tests +echo "" +echo "=== Running Tests ===" +make -j$(nproc) sqllogictest > /dev/null 2>&1 +if ! ./bin/bustub-sqllogictest ../test/sql/p3.00-primer.slt --verbose; then + echo "FAIL: SQLLogicTest failed" + exit 1 +fi + +# Format check +echo "" +echo "=== Format Check ===" +make format > /dev/null 2>&1 +if ! make check-clang-tidy-p3; then + echo "FAIL: clang-tidy check failed" + exit 1 +fi + +echo "" +echo "PASS: All checks passed" +exit 0 + diff --git a/benchmarks/courselab_bench/data/cmu_15-445/task_database/evaluate4.sh b/benchmarks/courselab_bench/data/cmu_15-445/task_database/evaluate4.sh new file mode 100644 index 00000000..ea7ac9e9 --- /dev/null +++ b/benchmarks/courselab_bench/data/cmu_15-445/task_database/evaluate4.sh @@ -0,0 +1,66 @@ +#!/bin/bash +set -e + +cd /workspace + +# Verify test files weren't modified +echo "Verifying protected test files were not modified" +if ! sha256sum -c /tmp/checksums/test4_txn_timestamp.sha256 > /dev/null 2>&1; then + echo "FAIL: test/concurrency/txn_timestamp_test.cpp was modified" + exit 1 +fi +if ! sha256sum -c /tmp/checksums/test4_txn_scan.sha256 > /dev/null 2>&1; then + echo "FAIL: test/concurrency/txn_scan_test.cpp was modified" + exit 1 +fi +if ! sha256sum -c /tmp/checksums/test4_txn_executor.sha256 > /dev/null 2>&1; then + echo "FAIL: test/concurrency/txn_executor_test.cpp was modified" + exit 1 +fi +echo "Protected test files unchanged" + +# Build +echo "" +echo "=== Building ===" +rm -rf build +mkdir build && cd build +cmake -DCMAKE_BUILD_TYPE=Debug .. > /dev/null 2>&1 +if ! make -j$(nproc); then + echo "FAIL: Build failed" + exit 1 +fi + +# Run tests +echo "" +echo "=== Running Tests ===" +make -j$(nproc) txn_timestamp_test > /dev/null 2>&1 +if ! ./test/txn_timestamp_test; then + echo "FAIL: txn_timestamp_test failed" + exit 1 +fi + +make -j$(nproc) txn_scan_test > /dev/null 2>&1 +if ! ./test/txn_scan_test; then + echo "FAIL: txn_scan_test failed" + exit 1 +fi + +make -j$(nproc) txn_executor_test > /dev/null 2>&1 +if ! ./test/txn_executor_test; then + echo "FAIL: txn_executor_test failed" + exit 1 +fi + +# Format check +echo "" +echo "=== Format Check ===" +make format > /dev/null 2>&1 +if ! make check-clang-tidy-p4; then + echo "FAIL: clang-tidy check failed" + exit 1 +fi + +echo "" +echo "PASS: All checks passed" +exit 0 + diff --git a/benchmarks/courselab_bench/data/cmu_15-445/task_database/preprocess.sh b/benchmarks/courselab_bench/data/cmu_15-445/task_database/preprocess.sh new file mode 100644 index 00000000..fe46518d --- /dev/null +++ b/benchmarks/courselab_bench/data/cmu_15-445/task_database/preprocess.sh @@ -0,0 +1,49 @@ +#!/bin/bash +set -e + +echo "=== Setting up CMU 15-445 Database Lab ===" + +cd /workspace + +echo "Installing git" +apt-get update > /dev/null 2>&1 +apt-get install -y git > /dev/null 2>&1 + +echo "Cloning bustub repository" +git clone https://github.com/cmu-db/bustub.git /tmp/bustub > /dev/null 2>&1 + +echo "Moving source to workspace" +mv /tmp/bustub/* ./ +mv /tmp/bustub/.clang-format ./ 2>/dev/null || true +mv /tmp/bustub/.clang-tidy ./ 2>/dev/null || true +rm -rf /tmp/bustub .git + +echo "Installing build dependencies" +build_support/packages.sh -y > /dev/null 2>&1 + +echo "Creating checksums for protected test files" +mkdir -p /tmp/checksums +# Task 1 test files +sha256sum test/buffer/arc_replacer_test.cpp > /tmp/checksums/test1_arc_replacer.sha256 +sha256sum test/storage/disk_scheduler_test.cpp > /tmp/checksums/test1_disk_scheduler.sha256 +sha256sum test/buffer/page_guard_test.cpp > /tmp/checksums/test1_page_guard.sha256 +sha256sum test/buffer/buffer_pool_manager_test.cpp > /tmp/checksums/test1_buffer_pool_manager.sha256 +# Task 2 test files +sha256sum test/storage/b_plus_tree_insert_test.cpp > /tmp/checksums/test2_b_plus_tree_insert.sha256 +sha256sum test/storage/b_plus_tree_sequential_scale_test.cpp > /tmp/checksums/test2_b_plus_tree_sequential_scale.sha256 +sha256sum test/storage/b_plus_tree_delete_test.cpp > /tmp/checksums/test2_b_plus_tree_delete.sha256 +sha256sum test/storage/b_plus_tree_concurrent_test.cpp > /tmp/checksums/test2_b_plus_tree_concurrent.sha256 +# Task 3 test files +sha256sum test/sql/p3.00-primer.slt > /tmp/checksums/test3_primer.sha256 +# Task 4 test files +sha256sum test/concurrency/txn_timestamp_test.cpp > /tmp/checksums/test4_txn_timestamp.sha256 +sha256sum test/concurrency/txn_scan_test.cpp > /tmp/checksums/test4_txn_scan.sha256 +sha256sum test/concurrency/txn_executor_test.cpp > /tmp/checksums/test4_txn_executor.sha256 + +echo "Building project" +mkdir -p build && cd build +cmake -DCMAKE_BUILD_TYPE=Debug .. > /dev/null 2>&1 +make -j$(nproc) > /dev/null 2>&1 + +echo "Setup complete" + diff --git a/benchmarks/courselab_bench/data/cmu_15-445/task_database/sol.sh b/benchmarks/courselab_bench/data/cmu_15-445/task_database/sol.sh new file mode 100755 index 00000000..70a870c1 --- /dev/null +++ b/benchmarks/courselab_bench/data/cmu_15-445/task_database/sol.sh @@ -0,0 +1,11 @@ +#!/bin/bash +# This script simulates what an agent might execute to solve the task + +cat > dummy.cpp << 'EOF' +#include + +int main() { + std::cout << "Hello, World!" << std::endl; + return 0; +} +EOF \ No newline at end of file diff --git a/benchmarks/courselab_bench/data/cmu_15-445/task_database/task.md b/benchmarks/courselab_bench/data/cmu_15-445/task_database/task.md new file mode 100644 index 00000000..045301b6 --- /dev/null +++ b/benchmarks/courselab_bench/data/cmu_15-445/task_database/task.md @@ -0,0 +1 @@ +This is a dummy task to pass the CI test. Real tasks are in task 1-4. \ No newline at end of file diff --git a/benchmarks/courselab_bench/data/cmu_15-445/task_database/task1.md b/benchmarks/courselab_bench/data/cmu_15-445/task_database/task1.md new file mode 100644 index 00000000..672bb15f --- /dev/null +++ b/benchmarks/courselab_bench/data/cmu_15-445/task_database/task1.md @@ -0,0 +1,230 @@ +# Phase 1 + +**YOU ARE ONLY ALLOWED TO MODIFY OR ADD FILES IN THE src DIRECTORY.** + +## Overview + +In this lab, you will build a disk-oriented database management system (DBMS) called **BusTub**. A disk-oriented architecture means that the DBMS's primary storage location is in persistent storage, like a hard drive (HDD) or flash storage (SSDs). This is different from an in-memory DBMS, where data is stored in volatile memory. + +The first programming project is to implement the DBMS's **buffer pool manager**. The buffer pool is responsible for moving physical pages of data back and forth from buffers in main memory to persistent storage. It also behaves as a cache, keeping frequently used pages in memory for faster access, and evicting unused or cold pages back out to storage. + +A page in BusTub is 8192 bytes (8 KB) of data, meaning the buffer pool manages data in 8 KB units. Since pages in BusTub are fixed size, the buffer pool manager stores these pages into fixed-size buffers called **frames**. The distinction between a page and a frame is somewhat subtle. A page is 8 KB of logical (virtual) data, and can be stored in memory, on disk, or both in memory and on disk. A frame, on the other hand, is a fixed-length 8 KB block of memory (i.e., a pointer to this memory) that stores a single page of data. The analogy here is storing (logical) pages inside (physical) fixed frames. + +In addition to behaving as a cache, the buffer pool manager allows a DBMS to support databases that are larger than the amount of memory available to the system. Consider a computer with 1 GB of memory (RAM). If we want to manage a 2 GB database, a buffer pool manager gives us the ability to interact with this database without needing to fit its entire contents in memory. + +The I/O operations that the buffer pool executes are abstracted away from other parts of the DBMS. For example, when one of the DBMS's components (e.g., execution engine) asks the buffer pool manager for a page of data using its unique identifier (`page_id_t`), that component does not need to know whether that page is already in memory or whether the system has to retrieve it from disk. Similarly, the buffer pool manager does not need to understand the contents of these pages, it only needs to know where the data is located. + +## Implementation + +Your implementation of the buffer pool must be thread-safe. Multiple threads will concurrently access the internal data structures of your buffer pool, and you must make sure that critical sections are protected with latches (these are called "locks" in operating systems). + +You must implement the following storage manager components: + +* **Adaptive Replacement Cache (ARC) Replacement Policy** +* **Disk Scheduler** +* **Buffer Pool Manager** + +## Project Specification + +Remember to pull latest code from the BusTub repository. + +For each of the following components, we have provided stub classes that contain the API that you must implement. You should not modify the signatures for the pre-defined functions in these classes. If you modify the signatures, our grading test code will not work and you will not get credit for this project. + +If a class already contains data members, you should not remove them. For example, the `BufferPoolManager` class contains `DiskScheduler` and `ArcReplacer` members that are required to implement functionality needed by the rest of the system. You may add data members and helper functions to these classes to correctly implement the required functionality. + +You may use any built-in C++17 containers in your project unless specified otherwise. It is up to you to decide which ones you want to use. Be warned that these containers are not thread-safe, and you will need to use latches to protect access to them. You may not use additional third-party libraries (e.g., Boost). + +### Task #1 - Adaptive Replacement Cache (ARC) Replacement Policy + +This component is responsible for tracking page usage in the buffer pool in order to determine candidate pages / frames to evict out of memory and back to disk. + +You will implement a class called `ArcReplacer` in `src/include/buffer/arc_replacer.h` and its corresponding implementation file in `src/buffer/arc_replacer.cpp`. Note that `ArcReplacer` is a standalone class and is not related to any of the other `Replacer` classes. You are only expected to implement the Arc replacement policy, and you don't have to implement the LRU-K, LRU or Clock replacement policies (even though there are corresponding files for them). + +The ARC replacement policy, originally developed at IBM, is an adaptive replacement policy that changes to the workload it observes. It involves two lists that tracks the cached pages, two lists that tracks the recently evicted pages, and a target size that is adaptive to the workload. Because of this adaptiveness, the ARC replacement policy generally performs better than LRU. Refer to the original paper for more details. + +You will be implementing a variant of the ARC replacement policy for this project. + +You will need to implement the following methods for ARC as defined in the header file (`src/include/buffer/arc_replacer.h`) and in the source file (`src/buffer/arc_replacer.cpp`): + +* **`Size() -> size_t`**: This method returns the number of evictable frames that are currently in the `ArcReplacer`. +* **`SetEvictable(frame_id_t frame_id, bool set_evictable)`**: This method controls whether a frame is evictable or not. It also controls the ARCReplacer's size. You'll know when to call this function when you implement the `BufferPoolManager`. To be specific, when the pin count of a page hits 0, its corresponding frame should be marked as evictable. +* **`RecordAccess(frame_id_t frame_id, page_id_t page_id)`**: Record that the given page has has been accessed at the current timestamp, in the given frame. This method should be called after a page has been pinned to a frame in the `BufferPoolManager`. The algorithm for this has been given below. +* **`Evict() -> std::optional`**: Evict a frame following the eviction process of the ARC algorithm. If there are no evictable frames, return `std::nullopt`. The algorithm for this has been given below. +* **`Remove(frame_id_t frame_id)`**: Remove a frame and its corresponding page from the replacer if it exists and is evictable. This method should be called only when a page is deleted in the `BufferPoolManager`. + +#### ARC Replacement Algorithm + +The ARC algorithm has the following parts. We start with two lists: the **MRU** (most recently used) list tracks the frames and their corresponding pages that were recently accessed exactly once, while the **MFU** (most frequently used) list tracks the frames and their corresponding pages that were recently accessed more than one time. We also start with two ghost lists: an **MRU ghost list** and an **MFU ghost list**. These lists tracks pages that are no longer in the buffer pool, but were recently evicted. Lastly, we also have a **target size** for the MRU list that adapts to the change of the workload, which starts at 0. Note that the actual MRU list size could be different than the target, it may be smaller or larger, this is just our target size. + +When working with the ARC replacer, there are generally five concepts here involving sizes, which is to be distinguished from each other: + +1. **The capacity of the replacer (`replacer_size_`)**: The maximum number of frames that the ArcReplacer supports is the same as the size of the buffer pool since it contains placeholders for all of the frames in the BufferPoolManager. +2. **The total size of the four lists**: Due to the tracking of the evicted pages in the ghost list, although the capacity of the ArcReplacer is only the number of frames of the buffer pool, the four lists could have a total size up to 2 * capacity. +3. **The current evictable size (`curr_size_`)**: Not all frames in the replacer may be considered as evictable at any given time. The evictable size of the ArcReplacer is represented by the number of evictable frames. The ArcReplacer is first initialized to have no frames in it. Only when a frame is marked as evictable will replacer's size will increase. Similarly, when a frame is pinned or not in use, the replacer's size will decrease. +4. **The MRU list target size (`mru_target_size_`)**: The target size of the MRU list, which adapts to the workload observed. The adaption algorithm is given below. +5. **The MRU list actual size (`mru_.size()`)**: The actual size of the MRU list, which could defer from the MRU target size. + +Also, please make sure you understand the relationship between frames and pages here, so it might make sense to you why tracking page ids along with frame ids is needed: + +* A page that is in the buffer pool has a one-to-one mapping to a frame. +* Until a page in the bufferpool is evicted, the one-to-one mapping between page and frame should not change. +* An evicted page is not associated with any frames. + +When performing `RecordAccess` over a frame and its corresponding page, there are four cases where exactly one of them will happen: + +1. **Page already exists in MRU/MFU**: This is the case where the actual cache hits. Move the page to the front of MFU. +2. **Page already exists in MRU ghost**: This is the case where the actual cache misses but we hit on the ghost list. In this case we treat it as a pseudo-hit and adapt the target size. If the size of the MRU ghost list is greater than or equal to the size of the MFU ghost list, increase the MRU target size by one. Else increase it by MFU ghost size / MRU ghost size (rounded down). Do not increase the target size above `replacer_size`. Then move the page to the front of MFU. The rational of this is if the MRU list is a little larger, then the DBMS could have had a cache hit. +3. **Page already exists in MFU ghost**: Similar to the previous case, this is when the actual cache misses but we hit on the ghost list. If the size of the MFU ghost list is greater than or equal to the size of the MRU ghost list, decrease the MRU target size by 1. Else decrease the MRU target size by MRU ghost size / MFU ghost size (rounded down). Do not decrease the target size below 0. Then move the page to the front of MFU. The rational of this is if the MFU list is a little larger, the DBMS could have had a cache hit. +4. **Page is not in the replacer**: This is the case where the actual cache misses and the ghost list misses. Then either of the following should happen. +* (a) If MRU size + MRU ghost size = replacer size: Kill the last element in the MRU ghost list, then add the page to the front of MRU. +* (b) Else MRU size + MRU ghost size should be smaller than replacer size (it should never be larger if you do things correctly). In this case: +* If MRU size + MRU ghost size + MFU size + MFU ghost size = 2 * replacer size: Kill the last element in the MFU ghost list, then add the page to the front of MRU. +* Else simply add the page to the front of the MRU. + +Try considering why in case 4(a) and 4(b), there must be items in the ghost lists. + +#### Implementation + +When you implement this algorithm, it is important to understand when should a page go to MRU, and when should it go to MFU. It also helps to think about why the given action is taken for each of the cases and what it's tring to do, rather than transpiling English into C++ code. If the MRU list size is smaller than the target size, we try to evict from the MFU list. If the MRU list size is greater than or equal to the target size, we try to evict from the MRU list. In either case, if eviction is not possible from the intended side (nothing is evictable in that list), try evicting from the other list. If still nothing is evictable, the eviction fails and return `std::nullopt`. + +The implementation details are up to you. You are allowed to use built-in STL containers. You may assume that you will not run out of memory for these data structures (you cannot assume the same for the buffer pool in Task #3, you will run out of available frames). You must make sure that your implementation is thread-safe. + +You might notice there is a test that tests for the performance of your `RecordAccess` implementation. If your implementation fails / times out on the test, try think of what makes `RecordAccess` slow and how you could fix it. As a reminder, you will modify the data structures and member variables we provided you in the header file, but you can also add additional data structures to speed up operations. + +If you would like to read more about the ARC replacement algorithm, refer to [this paper](https://www.usenix.org/legacy/events/fast03/tech/full_papers/megiddo/megiddo.pdf). This project does not require you to implement the original algorithm exactly. You are also welcome to think about what we required you to do that is in addition to what the original algorithm could achieve. + + +### Task #2 - Disk Scheduler + +This component is responsible for scheduling read and write operations on the `DiskManager`. You will implement a class called `DiskScheduler` in `src/include/storage/disk/disk_scheduler.h` and its corresponding implementation file in `src/storage/disk/disk_scheduler.cpp`. + +The disk scheduler can be used by other components (in this case, your `BufferPoolManager` in Task #3) to queue disk requests, represented by a `DiskRequest` struct (already defined in `src/include/storage/disk/disk_scheduler.h`). The disk scheduler will maintain a background worker thread which is responsible for processing scheduled requests. + +The disk scheduler will utilize a shared queue (channel) to schedule and process the `DiskRequests`. One thread will add a request to the queue, and the disk scheduler's background worker will process the queued requests. We have provided a `Channel` class in `src/include/common/channel.h` to facilitate the thread-safe sharing of data between threads, but feel free to use your own implementation if you find it necessary. + +The `DiskScheduler` constructor and destructor are already implemented and are responsible for creating and joining the background worker thread. You will only need to implement the following methods as defined in the header file (`src/include/storage/disk/disk_scheduler.h`) and in the source file (`src/storage/disk/disk_scheduler.cpp`): + +* **`Schedule(std::vector &requests)`**: Schedules a vector of requests for the `DiskManager` to execute. The `DiskRequest` struct specifies whether the request is for a read or write, where the data should be read from / written into, and the page ID for the operation. The `DiskRequest` also includes a `std::promise` whose value should be set to true once the request is processed. See below for more information about `std::promise`. The implementation details are up to you, but you may wish to use a vector of requests as a way to pre-fetch data for the leaderboard challenges. +* **`StartWorkerThread()`**: The startup method for the background worker thread which processes the scheduled requests. The worker thread is created in the `DiskScheduler` constructor and calls this method. This worker thread is responsible for receiving queued requests and dispatching them to the `DiskManager`. Remember to set the value correctly on the `DiskRequest`'s callback to signal to the request issuer that the request has been completed. This should not return until the `DiskScheduler`'s destructor is called. + +We mentioned that one of the fields of a `DiskRequest` is a `std::promise`. If you are unfamiliar with C++ promises and futures, you can check out the documentation [here](https://en.cppreference.com/w/cpp/thread/promise). For the purposes of this project, they essentially provide a callback mechanism for a thread to know when their scheduled request is completed. To see an example of how they might be used, check out `disk_scheduler_test.cpp`. + +Again, the implementation details are up to you. You must make sure that your implementation is thread-safe. + +#### Disk Manager + +The header containing the `DiskManager` class is located at (`src/include/storage/disk/disk_manager.h`). It reads page data from disk and writes data to disk. Your disk scheduler will use `DiskManager::ReadPage()` and `DiskManager::WritePage()` while it is processing a read or write request. + + +### Task #3 - Buffer Pool Manager + +Finally, you must implement the buffer pool manager (**BufferPoolManager**)! Echoing the beginning of this page, the `BufferPoolManager` is responsible for fetching database pages from disk with the `DiskScheduler` and storing them in memory. The `BufferPoolManager` can also schedule writes of dirty pages out to disk when it is either explicitly instructed to do so or when it needs to evict a page to make space for a new page. + +Your `BufferPoolManager` implementation will use the `ArcReplacer` and `DiskScheduler` classes that you created in the previous steps of this assignment. The `ArcReplacer` will keep track of when pages are accessed so that it can decide which frame to evict when it must make room for a new page. The `DiskScheduler` will schedule writes and reads to disk on the `DiskManager`. + +We have provided a helper class called `FrameHeader`, which helps manage the in-memory frames. All access to page data should be through `FrameHeaders`. `FrameHeader` has a method called `GetData` that returns a raw pointer to its frame's memory, and the `DiskScheduler` / `DiskManager` will use this pointer to copy the contents of a physical page on disk into memory. + +As a reminder, the buffer pool manager does not need to understand the contents of these pages. The only information that the `BufferPoolManager` knows about pages are the page IDs (`page_id_t`) and the `FrameHeaders` they are stored inside of. Also, the `BufferPoolManager` will reuse the same `FrameHeader` object to store data as it moves back and forth between disk and memory. In other words, all `FrameHeaders` will store many different pages throughout the lifetime of the system. + +#### Concurrency + +When implementing a multi-threaded buffer pool manager, we must take care to synchronize data access. This means that we do not want multiple copies of the same page in different frames of the buffer pool. If we allowed this, we would encounter this scenario: + +1. Thread T1 loads page X1 from disk into a frame and starts modifying page X1, and let's call this new version page X2. +2. Thread T2 loads page X1 from disk into a different frame and starts modifying this version of page X1, and let's call this other modified version page X3. +3. Thread T2 finishes writing and writes X3 back to disk. +4. Thread T1 finishes writing and writes X2 back to disk. +5. Data race ☠️! + +Thus, we keep only 1 version of a page in memory at a time to prevent data synchronization races. Additionally, to prevent us from evicting a page while threads are accessing it, we maintain a reference count / pin count on the frame that stores it. Finally, in order to keep track of which pages are stored in which frames, we also maintain a page table using a hash map that maps page IDs to frames. + +The pin count of a frame is the number of threads that have access to the page's data. As long as the pin count on a frame is greater than 0 (implying there is at least 1 thread accessing the page's data), the buffer pool manager is not allowed to evict the page being stored. You can maintain the pin count using the atomic field `pin_count_` in the `FrameHeader` class. Keep in mind that `pin_count_` is separate from `ArcReplacer::SetEvictable`, so you will need to make sure those are synced properly. You will also have to update the `is_dirty_` flag of the `FrameHeader` when you think it is necessary. If this flag is set when you want to evict a page, you will have to act accordingly to maintain data synchronization between memory and disk. + +Lastly, you will have to implement both `ReadPageGuard` and `WritePageGuard`. These classes are RAII objects that provide thread-safe read / write access to the underlying pages. See the implementation section below for more information. You will probably need to implement this in tandem with the `BufferPoolManager` methods `CheckedReadPage` and `CheckedWritePage`. However, if you want to make sure your page guard implementations are correct, you may choose to implement `BufferPoolManager::GetPinCount` first and then stitch together something that will pass the page guard tests. + +#### Implementation + +You will need to implement the following page guard methods defined in the header file (`src/include/storage/page/page_guard.h`) and in the source file (`src/storage/page/page_guard.cpp`): + +* `ReadPageGuard::ReadPageGuard()` +* `ReadPageGuard::ReadPageGuard(ReadPageGuard &&that)` +* `ReadPageGuard::operator=(ReadPageGuard &&that) -> ReadPageGuard &` +* `ReadPageGuard::Flush()` +* `ReadPageGuard::Drop()` +* `WritePageGuard::WritePageGuard()` +* `WritePageGuard::WritePageGuard(WritePageGuard &&that)` +* `WritePageGuard::operator=(WritePageGuard &&that) -> WritePageGuard &` +* `WritePageGuard::Flush()` +* `WritePageGuard::Drop()` + +You do not have to implement these methods before the `BufferPoolManager` methods. You should probably work on them at the same time. + +These methods implement move semantics and RAII for the page guards. If you are unfamiliar with these things, please familiarize yourself with learning materials online. There are many great resources (including articles, Microsoft tutorials, YouTube videos) that explain this in depth. You should not attempt to implement these methods without having a solid understanding of how RAII and move semantics work. + +There will likely be a lot of code duplication here (i.e. the two guards should be identical except for a handful of lines). If you want to derive these classes based on a class you create, you are welcome to do so. Just make sure that no interfaces and method signatures are changed! + +You will also need to implement the following `BufferPoolManager` methods defined in the header file (`src/include/buffer/buffer_pool_manager.h`) and in the source file (`src/buffer/buffer_pool_manager.cpp`): + +* `NewPage() -> page_id_t` +* `DeletePage(page_id_t page_id) -> bool` +* `CheckedWritePage(page_id_t page_id) -> std::optional` +* `CheckedReadPage(page_id_t page_id) -> std::optional` +* `FlushPageUnsafe(page_id_t page_id) -> bool` +* `FlushPage(page_id_t page_id) -> bool` +* `FlushAllPagesUnsafe()` +* `FlushAllPages()` +* `GetPinCount(page_id_t page_id)` + +All of these methods have detailed documentation comments in the source file. Make sure to read all of these in their entirety because they contain many useful hints! + +You do not need to make your buffer pool manager super efficient. For all of the public `BufferPoolManager` method, holding the buffer pool latch from beginning to end should be enough (except for when you need to release it early to prevent deadlocks). However, you do need to ensure that your buffer pool manager has reasonable performance, otherwise there will be problems in future projects. You can compare your benchmark result (QPS.1 and QPS.2) with other students and see if your implementation is too slow. + +Please refer to the source files (`src/storage/page/page_guard.cpp` and `src/buffer/buffer_pool_manager.cpp`) for significantly more detailed specifications and documentation. + +### Testing + +You can test the individual components of this assigment using our testing framework. We use GTest for unit test cases. There are three separate files that contain tests for each component: + +* **ArcReplacer:** `test/buffer/arc_replacer_test.cpp` +* **DiskScheduler:** `test/storage/disk_scheduler_test.cpp` +* **PageGuard:** `test/storage/page_guard_test.cpp` +* **BufferPoolManager:** `test/buffer/buffer_pool_manager_test.cpp` + +You can compile and run each test individually from the command-line: + +```bash +$ make arc_replacer_test -j `nproc` +$ ./test/arc_replacer_test + +``` + +### Formatting + +Your code must follow the Google C++ Style Guide. We use Clang to automatically check the quality of your source code. Your project grade will be zero if your submission fails any of these checks. + +Execute the following commands to check your syntax. The `format` target will automatically correct your code. The `check-lint` and `check-clang-tidy-p1` targets will print errors and instruct you how to fix it to conform to our style guide. + +```bash +$ make format +$ make check-lint +$ make check-clang-tidy-p1 + +``` + +### Memory Leaks + +For this project, we use LLVM Address Sanitizer (ASAN) and Leak Sanitizer (LSAN) to check for memory errors. To enable ASAN and LSAN, configure CMake in debug mode and run tests as you normally would. If there is memory error, you will see a memory error report. Note that macOS only supports address sanitizer without leak sanitizer. + +In some cases, address sanitizer might affect the usability of the debugger. In this case, you might need to disable all sanitizers by configuring the CMake project with: + +```bash +$ cmake -DCMAKE_BUILD_TYPE=Debug -DBUSTUB_SANITIZER= .. + +``` + +### Development Hints + +* You can use `BUSTUB_ASSERT` for assertions in debug mode. Note that the statements within `BUSTUB_ASSERT` will NOT be executed in release mode. If you have something to assert in all cases, use `BUSTUB_ENSURE` instead. +* Post all of your questions about this project on Piazza. Do not email the TAs directly with questions. +* We encourage you to use a graphical debugger to debug your project if you are having problems. +* If you are having compilation problems, running `make clean` does not completely reset the compilation process. You will need to delete your build directory and run `cmake ..` again before you rerun `make`. \ No newline at end of file diff --git a/benchmarks/courselab_bench/data/cmu_15-445/task_database/task2.md b/benchmarks/courselab_bench/data/cmu_15-445/task_database/task2.md new file mode 100644 index 00000000..f7d04dd5 --- /dev/null +++ b/benchmarks/courselab_bench/data/cmu_15-445/task_database/task2.md @@ -0,0 +1,198 @@ +# Phase 2 + +**YOU ARE ONLY ALLOWED TO MODIFY OR ADD FILES IN THE src DIRECTORY.** + +## Overview + +In this project you will implement a B+Tree index in your database system. A B+Tree is a balanced search tree in which the internal pages direct the search and leaf pages contain the actual data entries. The index provides fast data retrieval without needing to search every row in a database table, enabling rapid random lookups and efficient scans of ordered records. Your implementation will support thread-safe search, insertion, deletion (including splitting and merging nodes), and an iterator to support in-order leaf scans. You need to complete the following tasks: + +* **Task #1** - B+Tree Pages +* **Task #2** - B+Tree Operations (Insertion, Deletion, and Point Search) +* **Task #3** - Index Iterator +* **Task #4** - Concurrency Control + +--- + +## Project Specification + +We have provided stub classes that define the APIs that you must implement. You should not modify the signatures of these pre-defined functions; if you do, our test code will not work and you will receive little or no credit for the project. Similarly, you should not remove existing member variables from the code we provide. You may add functions and member variables to these classes to implement your solution. + +--- + +## Task #1 - B+Tree Pages + +You must implement the following three Page classes to store the data of your B+Tree. + +1. B+Tree Page +2. B+Tree Internal Page +3. B+Tree Leaf Page + +### Base Page + +This is a base class that the Internal Page and Leaf Page inherit from, and contains only information that both child classes share. The B+Tree Page has the following fields: + +| Variable Name | Size | Description | +| --- | --- | --- | +| `page_type_` | 4 | Page type (invalid page / leaf page / internal page) | +| `size_` | 4 | Number of key & value pairs in a page | +| `max_size_` | 4 | Max number of key & value pairs in a page | + +You must implement the B+Tree Page by modifying only its header file (`src/include/storage/page/b_plus_tree_page.h`) and the corresponding source file (`src/storage/page/b_plus_tree_page.cpp`). + +### Internal Page + +An Internal Page (i.e., inner node) stores ordered keys and child pointers (i.e. `page_id`s) to other B+Tree Pages. These keys and pointers are internally represented as an array of key/page_id pairs. As the number of child pointers is one more than the number of keys, the first key in `key_array_` (see `src/include/storage/page/b_plus_tree_internal_page.h`) is set to be invalid, and lookups should always start from the second key. + +At any time, each internal page should be at least half full. During deletion, two half-full pages can be merged, or keys and pointers can be redistributed to avoid merging. During insertion, one full page can be split into two, or keys and pointers can be redistributed to avoid splitting. These are examples of the many design choices that you will make while implementing your B+Tree. + +You must implement the Internal Page by modifying only its header file (`src/include/storage/page/b_plus_tree_internal_page.h`) and the corresponding source file (`src/storage/page/b_plus_tree_internal_page.cpp`). + +### Leaf Page + +The Leaf Page stores ordered keys and their corresponding values. In your implementation, the value should always be the 64-bit record id for where the actual tuples are stored -- see the `RID` class, in `src/include/common/rid.h`. Leaf pages have the same restrictions on the number of key/value pairs as Internal pages, and should follow the same operations for merging, splitting, and redistributing keys. + +For this project, we will extend our leaf page implementation by also including a tombstone buffer for recent deletions. This tombstone buffer stores the last indexes of entries in key/value arrays that have been deleted. Thus, when a key is deleted from the index (if ) its entry in its corresponding leaf page is not actually deleted but the index is appended to the tombstone buffer. Only when the buffer of said leaf page has entries in it is the oldest buffered deletion actually applied to the key/value arrays. This is a simplified version of the Bε-tree discussed in the lectures. + +You must implement `GetTombstones()` to report the keys that the tombstones in a given page correspond to. `KeyAt` must however return the physical entry at a given index regardless of whether a tombstone exists for that entry. + +You must implement your Leaf Page by modifying only its header file (`src/include/storage/page/b_plus_tree_leaf_page.h`) and corresponding source file (`src/storage/page/b_plus_tree_leaf_page.cpp`). + +> **Note:** Even though Leaf Pages and Internal Pages contain the same key type, they may have different value types. Thus, the `max_size` can be different. + +Each B+Tree leaf/internal page corresponds to the content (i.e., the `data_` part) of a memory page fetched by the buffer pool. Every time you read or write from/to a leaf or internal page, you must first fetch the page from the buffer pool (using its unique `page_id`), Use `reinterpret_cast` to convert it to either a leaf or an internal page, and unpin the page after reading or writing from/to it. + +--- + +## Task #2 - B+Tree Operations (Insertion, Deletion, and Point Search) + +In this task, your B+Tree Index needs to support insertion (`Insert()`), deletion (`Remove()`), and search (`GetValue()`) for single values. The index should support only unique keys; if you try to reinsert an existing key into the index, insertion should not be performed and false will be returned. You must implement this task by modifying the source file `src/storage/index/b_plus_tree.cpp` and optionally its corresponding header file `src/include/storage/index/b_plus_tree.h`. + +B+Tree pages should be split (or keys should be redistributed) if an insertion would violate the B+Tree's invariants. Furthermore, leaf page tombstones (and their ordering) must be maintained across any merging, splitting, and redistributing operations. When a leaf is coalesced or redistributed into another leaf we consider all of its pending deletions to be more recent than any pending deletion in the recipient leaf (in other words: the node with entries being inserted into it should have its tombstones processed first). + +If an insertion changes the page ID of the root, you must update the `root_page_id` in the B+Tree index's header page. You can do this by accessing the `header_page_id_` page, which is given to you in the constructor. Then, by using `reinterpret_cast`, you can interpret this page as a `BPlusTreeHeaderPage` (from `src/include/storage/page/b_plus_tree_header_page.h`) and update the root page ID from there. You also must implement `GetRootPageId`. + +Similarly, your B+Tree Index must support including merging or redistributing keys between pages if necessary to maintain the B+Tree invariants when deleting a key. As with insertions, you must correctly update the B+Tree's root page ID if the root changes. + +We recommend that you use the page guard classes from Project #1 to avoid synchronization problems. You should use `ReadPage` or `WritePage` accordingly. + +You may optionally use the `Context` class (defined in `src/include/storage/index/b_plus_tree.h`) to track the pages that you've read or written (via the `read_set_` and `write_set_` fields) or to store other metadata that you need to pass into other functions recursively. + +**If you are using the Context class, here are some tips:** + +* You might only need to use `write_set_` when inserting or deleting. It is possible that you do not use `read_set_`, depending on your implementation. +* You might want to store the root page id in the context and acquire write guard of header page when modifying the B+Tree. +* To find a parent of the current node, look at the back of `write_set_`. It should contain all nodes along the access path. +* You may use `BUSTUB_ASSERT` to help you find inconsistent data in your implementation. For example, if you want to split a node (except root), you should ensure that there is still at least one node in the `write_set_`. If you need to split root, you should check if `header_page_` is `std::nullopt`. +* To unlock the header page, simply set `header_page_` to `std::nullopt`. To unlock other pages, pop from the `write_set_` and drop. + +The B+Tree is parameterized on arbitrary key, value, and key comparator types. We've defined a macro, `INDEX_TEMPLATE_ARGUMENTS`, that generates the template parameter declaration for you: + +```cpp +template + +``` + +The type parameters are: + +* **KeyType:** The type of each key in the index. In practice this will be a `GenericKey`. The actual size of a `GenericKey` varies, and is specified with its own template argument that depends on the type of indexed attribute. +* **ValueType:** The type of each value in the index. In practice, this will be a 64-bit RID. +* **KeyComparator:** A class used to compare whether two `KeyType` instances are less than, greater than, or equal to each other. These will be included in the `KeyType` implementation files. + +--- + +## Task #3 - Index Iterator + +After you have implemented and thoroughly tested your B+Tree in Tasks #1 and #2, you must add a C++ iterator that efficiently supports an in-order scan of the entries in the index. The basic idea is store sibling pointers so that you can efficiently traverse the leaf pages, and then implement an iterator that iterates through every key-value pair, in order, in the index. Note that this iterator must respect tombstones and thus you should skip any key-value pair with a corresponding tombstone. + +Your iterator must be a C++17-style Iterator, including at least the following methods: + +* `isEnd()`: Return whether this iterator is pointing at the last key/value pair. +* `operator++()`: Move to the next key/value pair. +* `operator*()`: Return the key/value pair this iterator is currently pointing at. +* `operator==()`: Return whether two iterators are equal. +* `operator!=()`: Return whether two iterators are not equal. + +Your `BPlusTree` also must correctly implement `begin()` and `end()` methods, to support C++ for-each loop functionality on the index. + +You must implement your index iterator by modifying only its header file (`src/include/storage/index/index_iterator.h`) and corresponding source file (`src/index/storage/index_iterator.cpp`). + +--- + +## Task #4 - Concurrency Control + +In the last task, you will modify your B+Tree implementation so that it safely supports concurrent operations. You should use the optimistic latch coupling/crabbing technique described in class and in the textbook. The thread traversing the index should acquire latches on B+Tree pages as necessary to ensure safe concurrent operations, and should release latches on parent pages as soon as possible when it is safe to do so. + +> **Note:** You should never acquire the same read latch twice in a single thread. It might lead to deadlock. + +## Instructions + +See the Project #0 instructions on how to create your private repository and setup your development environment. + +### Development Roadmap + +There are several ways in which you could go about building a B+Tree Index. This road map only serves as a rough conceptual guideline, which is based on the algorithm outlined in the textbook. + +1. **Simple Inserts:** Given a key-value pair KV and a non-full node N, insert KV into N. *Self check: What are the different types of nodes and can key-values be inserted in all of them?* +2. **Simple Search:** Given a key K, define a search mechanism on the tree to determine the presence of the key. *Self check: Can keys exist in multiple nodes and are all these keys the same?* +3. **Simple Splits:** Given a key K, and a target leaf node L that is full, insert the key into the tree, while keeping the tree consistent. *Self check: When do you choose to split a node and how to define a split?* +4. **Multiple Splits:** Define inserts for a key K on a leaf node L that is full, whose parent node M is also full. *Self check: What happens when the parent of M is also full?* +5. **Simple Deletes:** Given a key K and a target leaf node L that is at-least half full, delete K from L. *Self check: Is the leaf node L the only node that contains the key K?* +6. **Simple Coalesces:** Define deletion for a key K on a leaf node L that is less than half-full after the delete operation. *Self check: Is it mandatory to coalesce when L is less than half-full and how do you choose which node to coalesce with?* +7. **Not-So-Simple Coalesces:** Define deletion for a key K on a node L that contains no suitable node to coalesce with. *Self check: Does coalescing behavior vary depending on the type of nodes?* This should take you through to Task 1 and 2. +8. **Index Iterators:** The section on Task #3 describes the implementation of an iterator for the B+Tree. +9. **Concurrent Indexes:** The section on Task #4 describes the implementation of the latch crabbing technique to support concurrency in your design. + +### Requirements and Hints + +* You are not allowed to use a global latch to protect your data structure; your implementation must support a reasonable level of concurrency. In other words, you may not latch the whole index and only unlatch when operations are done. +* We recommend that you use the page guard classes `ReadPageGuard` and `WritePageGuard` to implement thread safety for your B+Tree. You can receive full credit on this project if you use these constructs correctly. +* You may add functions to your implementation as long as you keep all our original public interfaces intact for testing purposes. +* Do not use `malloc` or `new` to allocate large blocks of memory for your tree. If you need to need to create a new node for your tree or need a buffer for some operation, you should use the buffer pool manager. +* Use binary search to find the place to insert a value when iterating an internal or leaf node. Otherwise, your implementation will probably timeout on Gradescope. +* We recommend (but do not require) that you to follow this rule when implementing split: split a leaf node when the number of values reaches `max_size` after insertion, and split an internal node when number of values reaches `max_size` before insertion. + +### Common Pitfalls + +* We do not test your iterator for thread-safe leaf scans. A correct implementation, however, would require the Leaf Page to throw a `std::exception` when it cannot acquire a latch on its sibling to avoid potential dead-locks. +* If you implement a concurrent B+Tree index correctly, every thread will always acquire latches from the header page to the bottom. When you release latches, make sure you release them in the same order (from the header page to the bottom). +* When implementing the page classes (Task 1), make sure you only add class fields of trivially-constructed types (e.g. `int`). Do not add vectors and do not modify `key_array_` and `value_array_`. + +--- + +## Testing + +You can test your B+ Tree implementation locally using the following tests: + +* `test/storage/b_plus_tree_insert_test.cpp` +* `test/storage/b_plus_tree_sequential_scale_test.cpp` +* `test/storage/b_plus_tree_delete_test.cpp` +* `test/storage/b_plus_tree_concurrent_test.cpp` + +We strongly encourage you to write additional test cases for yourself to better understand your implementation. + +**Compile and run each test:** + +```bash +$ mkdir build +$ cd build +$ make b_plus_tree_insert_test -j$(nproc) +$ ./test/b_plus_tree_insert_test + +``` + +## Formatting + +Your code must follow the Google C++ Style Guide. + +```bash +$ make format +$ make check-lint +$ make check-clang-tidy-p2 + +``` + +## Memory Leaks + +We use LLVM Address Sanitizer (ASAN) and Leak Sanitizer (LSAN) to check for memory errors. Configure CMake in debug mode to enable them. diff --git a/benchmarks/courselab_bench/data/cmu_15-445/task_database/task3.md b/benchmarks/courselab_bench/data/cmu_15-445/task_database/task3.md new file mode 100644 index 00000000..a072df59 --- /dev/null +++ b/benchmarks/courselab_bench/data/cmu_15-445/task_database/task3.md @@ -0,0 +1,732 @@ +# Phase 3 + +**YOU ARE ONLY ALLOWED TO MODIFY OR ADD FILES IN THE src DIRECTORY.** + +## Overview + +In this project, you will implement the components that allow BusTub to execute queries. You will create the operator executors that execute SQL queries and implement optimizer rules to transform query plans. + +This project is composed of several tasks: + +* **Task #1: Access Method Executors** +* **Task #2: Aggregation and Join Executors** +* **Task #3: HashJoin Executor and Optimization** +* **Task #4: External Merge Sort + Limit Executors + Window Functions** +* **Optional Leaderboard Task** + +This project must be completed individually (i.e., no groups). Before starting, run `git pull public master` to pull the latest code from the public BusTub repo. + +--- + +## Background + +Please read this section carefully because you will need to construct your own SQL queries to test your executor implementation. You can also use the bustub shell to understand: + +* Use `EXPLAIN` or `EXPLAIN (o)` to show raw and optimized plans +* Understand parameters in plan nodes (i.e., what the first and second 0 means in `#0.0`) +* Read a few simple plan node implementations in `include/execution/plans/` +* Pay attention to various expression types in `include/execution/expressions/` + +### Introduction + +BusTub's architecture is as follows: + +![](https://15445.courses.cs.cmu.edu/fall2025/project3/img/project-structure.svg) + +In the public BusTub repository, we provide a full query processing layer. You can use the BusTub shell to execute SQL queries, much like in other database systems. Use the following command to compile and run the BusTub shell: + +```bash +cd build && make -j$(nproc) shell +./bin/bustub-shell + +``` + +You can also use **BusTub Web Shell** to run the examples below. It is a complete reference solution of the system running in your browser! + +Within the shell, you can use `\dt` to view all tables. By default, the BusTub shell will automatically create three tables that are pre-populated with data. This is provided as a convenience so that you do not need to load data every time you rebuild your solution. Changes to these tables will not be persisted when you restart the DBMS. + +```text +bustub> \dt ++-----+----------------+------------------------------+ +| oid | name | cols | ++-----+----------------+------------------------------+ +| 0 | __mock_table_1 | (colA:INTEGER, colB:INTEGER) | +| 1 | __mock_table_2 | (colC:VARCHAR, colD:VARCHAR) | +| 2 | __mock_table_3 | (colE:INTEGER, colF:VARCHAR) | +| ... | ... | ... | ++-----+----------------+------------------------------+ + +``` + +You can view all data from a table by using the `SELECT` statement: + +```text +bustub> SELECT * FROM __mock_table_1; ++---------------------+---------------------+ +| __mock_table_1.colA | __mock_table_1.colB | ++---------------------+---------------------+ +| 0 | 0 | +| 1 | 100 | +| 2 | 200 | +| 3 | 300 | +| 4 | 400 | +| 5 | 500 | +| ... | ... | ++---------------------+---------------------+ + +``` + +**Please note:** + +* BusTub only supports a small subset of SQL. Don't be surprised if it does not work with some SQL queries. For all SQL queries supported in BusTub, refer to the SQLLogicTest files in `tests/sql`. +* If you are using CLion to run the BusTub shell, please add a `--disable-tty` parameter to the shell, so that it works correctly in the CLion terminal. +* Always end your statement with `;` (except internal commands). +* BusTub only supports `INT` and `VARCHAR(n)` type. Also you should use single quotes for strings, e.g., `INSERT INTO table VALUES ('a')`. + +### Inspecting SQL Query Plans + +BusTub supports the `EXPLAIN` command to print a query's execution plan. You can add `EXPLAIN` in front of any query. For example: + +```text +bustub> EXPLAIN SELECT * FROM __mock_table_1; +=== BINDER === +BoundSelect { + table=BoundBaseTableRef { table=__mock_table_1, oid=0 }, + columns=[__mock_table_1.colA, __mock_table_1.colB], + groupBy=[], + having=, + where=, + limit=, + offset=, + order_by=[], + is_distinct=false, +} +=== PLANNER === +Projection { exprs=[#0.0, #0.1] } | (__mock_table_1.colA:INTEGER, __mock_table_1.colB:INTEGER) +MockScan { table=__mock_table_1 } | (__mock_table_1.colA:INTEGER, __mock_table_1.colB:INTEGER) +=== OPTIMIZER === +MockScan { table=__mock_table_1 } | (__mock_table_1.colA:INTEGER, __mock_table_1.colB:INTEGER) + +``` + +The result of `EXPLAIN` provides an overview of the transformation process within the query processing layer. The statement is first processed by the parser and the binder, which produces an abstract syntax tree (AST) representing the query. In this example, the query is represented by a `BoundSelect` on `__mock_table_1` that will retrieve two columns (`colA` and `colB`). Note that the binder automatically expands the `*` character from the original SQL query into the actual columns in the table. + +Next, the binder AST is processed by the planner, which will produce an appropriate query plan. In this case, the query plan is a tree of two nodes, with data flowing from the leaves to the root: + +![](https://15445.courses.cs.cmu.edu/fall2025/project3/img/mock_scan.svg) + +After that, the optimizer will optimize the query plan. In this case, it removes the projection because it is redundant. + +Let's consider a more complex example: + +```text +bustub> EXPLAIN (o) SELECT colA, MAX(colB) FROM + (SELECT * FROM __mock_table_1, __mock_table_3 WHERE colA = colE) GROUP BY colA; +=== OPTIMIZER === +Agg { types=[max], aggregates=[#0.1], group_by=[#0.0] } + NestedLoopJoin { type=Inner, predicate=(#0.0=#1.0) } + MockScan { table=__mock_table_1 } + MockScan { table=__mock_table_3 } + +``` + +For this example, the optimized query plan is: + +![](https://15445.courses.cs.cmu.edu/fall2025/project3/img/more_complex_example.svg) + +In this project, you will need to construct SQL queries to test each of your executor's implementations. `EXPLAIN` is extremely helpful for you to know if a SQL query is using a specific executor. + +--- + +## Sample Executors + +In the BusTub public repository, we provide several sample executor implementations. + +### Projection + +A projection node can represent various computations on its input. It will always have exactly one child node. In the BusTub shell, inspect the query plans for the following queries: + +* `EXPLAIN SELECT 1 + 2;` +* `EXPLAIN SELECT colA FROM __mock_table_1;` +* `EXPLAIN SELECT colA + colB AS a, 1 + 2 AS b FROM __mock_table_1;` + +A projection plan node consists of one or more expressions representing a computation: + +* **ColumnValueExpression**: directly places a column of the child executor to the output. The syntax `#0.0` means the first column in the first child. You will see something like `#0.0 = #1.0` in a plan for joins. +* **ConstantExpression**: represents a constant value (e.g., 1). +* **ArithmeticExpression**: a tree representing an arithmetic computation. For example, `1 + 2` would be represented by an `ArithmeticExpression` with two `ConstantExpression` (1 and 2) as children. + +### Filter + +A filter plan node is used to filter the output of a child given a predicate. For example: +`EXPLAIN SELECT * FROM __mock_table_1 WHERE colA > 1;` +A filter node has exactly one child and contains a predicate. + +### Values + +A values plan node is used to directly produce values: + +* `EXPLAIN values (1, 2, 'a'), (3, 4, 'b');` +* `CREATE TABLE table1(v1 INT, v2 INT, v3 VARCHAR(128));` +* `EXPLAIN INSERT INTO table1 VALUES (1, 2, 'a'), (3, 4, 'b');` +Values plan nodes are useful when inserting user-supplied values into a table. + +### Query Plan Syntax + +As you might have noticed, `EXPLAIN` produces a string of column descriptions after each plan node. That's the output schema of the node. Consider this example output: +`Projection { exprs=[#0.0, #0.1] } | (__mock_table_1.colA:INTEGER, __mock_table_1.colB:INTEGER)` +This indicates that the executor representing this plan node will produce two columns, both of integer types. The output schema is inferred within the planner. For this project, your executor implementations must produce tuples with schema exactly as specified in the plan node, or they will fail our unit tests. + +--- + +## Project Specification + +In this project, you will add new operator executors and query optimizations to BusTub. BusTub uses the row-based vectorization query processing model, in which every executor implements a `Next` function to get the next tuple batch result of max `BUSTUB_BATCH_SIZE` tuples. When the DBMS invokes an executor's `Next` function, the executor returns either (1) a batch of some tuples or (2) an indicator that there are no more tuples. With this approach, each executor implements a loop that continues calling `Next` on its children to retrieve tuples and process them batch by batch. + +In BusTub's implementation of the vectorization model, the `Next` function for each executor returns a batch of record identifiers (RID) in addition to a batch of tuples. A record identifier serves as a unique identifier for a tuple. + +The executors are created from an execution plan in `src/execution/executor_factory.cpp`. + +All test cases in this project are written in a special file format called SQLLogicTest (derived from SQLite). You can find how to use it at the end of this page. + +--- + +## Task #1 - Access Method Executors + +In the background section above, we saw that the BusTub can already retrieve data from mock tables in `SELECT` queries. This is implemented without real tables by using a `MockScan` executor to always generate the same tuples using a predefined algorithm. This is why you cannot update these tables. + +In this task, you will implement executors that read from and write to tables in the storage system. You will complete your implementation in the following files: + +* `src/include/execution/executors/seq_scan_executor.h` +* `src/execution/seq_scan_executor.cpp` +* `src/include/execution/executors/insert_executor.h` +* `src/execution/insert_executor.cpp` +* `src/include/execution/executors/update_executor.h` +* `src/execution/update_executor.cpp` +* `src/include/execution/executors/delete_executor.h` +* `src/execution/delete_executor.cpp` +* `src/include/execution/executors/index_scan_executor.h` +* `src/execution/index_scan_executor.cpp` +* `src/optimizer/seqscan_as_indexscan.cpp` + +Each of these executors is described below. + +### SeqScan + +The `SeqScanPlanNode` can be planned with a `SELECT * FROM table` statement. + +```text +bustub> CREATE TABLE t1(v1 INT, v2 VARCHAR(100)); +Table created with id = 15 +bustub> EXPLAIN (o,s) SELECT * FROM t1; +=== OPTIMIZER === +SeqScan { table=t1 } | (t1.v1:INTEGER, t1.v2:VARCHAR) + +``` + +The `SeqScanExecutor` iterates over a table and returns its tuples one batch at a time. + +**Hint:** Make sure that you understand the difference between the pre-increment and post-increment operators when using the `TableIterator` object. (Check [here](https://en.cppreference.com/w/cpp/language/operator_incdec) for a quick refresher.) + +**Hint:** Do not emit tuples that are deleted in the `TableHeap`. Check the `is_deleted_` field of the corresponding `TupleMeta` for each tuple. + +**Hint:** The output of sequential scan is a copy of each matched tuple and its original record identifier (RID). + +**Note:** BusTub does not support `DROP TABLE` or `DROP INDEX`. You can reset your database by restarting the shell. + +### Insert + +The `InsertPlanNode` can be planned with an `INSERT` statement. Note that you will need to use a single quote to specify a `VARCHAR` value. + +```text +bustub> EXPLAIN (o,s) INSERT INTO t1 VALUES (1, 'a'), (2, 'b'); +=== OPTIMIZER === +Insert { table_oid=15 } | (__bustub_internal.insert_rows:INTEGER) + Values { rows=2 } | (__values#0.0:INTEGER, __values#0.1:VARCHAR) + +``` + +The `InsertExecutor` inserts tuples into a table and updates any affected indexes. It has exactly one child producing values to be inserted into the table. The planner will ensure that the values have the same schema as the table. The executor will produce a single tuple of integer type as the output, indicating how many rows have been inserted into the table. Remember to update indexes when inserting into the table, if there are indexes associated with it. + +**Hint:** See the **System Catalog** section below for information about the system catalog. To initialize this executor, you will need to look up information about the table being inserted into. + +**Hint:** See the **Index Updates** section below for further details about updating a table's indexes. + +**Hint:** You will need to use the `TableHeap` class to perform table modifications. + +### Update + +The `UpdatePlanNode` can be planned with an `UPDATE` statement. It has exactly one child with the records to be updated in the table. + +```text +bustub> explain (o,s) update test_1 set colB = 15445; +=== OPTIMIZER === +Update { table_oid=20, target_exprs=[#0.0, 15445, #0.2, #0.3] } | (__bustub_internal.update_rows:INTEGER) + SeqScan { table=test_1 } | (test_1.colA:INTEGER, test_1.colB:INTEGER, test_1.colC:INTEGER, test_1.colD:INTEGER) + +``` + +The `UpdateExecutor` modifies existing tuples in a specified table. The executor will produce a single tuple of integer type as the output, indicating how many rows have been updated. Remember to update any indexes affected by the updates. + +**Hint:** To implement an update, first delete the affected tuple and then insert a new tuple. + +### Delete + +The `DeletePlanNode` can be planned with a `DELETE` statement. It has exactly one child with the records to be deleted from the table. Your delete executor should produce an integer output that represents the number of rows that it deleted from the table. It will also need to update any affected indexes. + +```text +bustub> EXPLAIN (o,s) DELETE FROM t1; +=== OPTIMIZER === +Delete { table_oid=15 } | (__bustub_internal.delete_rows:INTEGER) + Filter { predicate=true } | (t1.v1:INTEGER, t1.v2:VARCHAR) + SeqScan { table=t1 } | (t1.v1:INTEGER, t1.v2:VARCHAR) + +bustub> EXPLAIN (o,s) DELETE FROM t1 where v1 = 1; +=== OPTIMIZER === +Delete { table_oid=15 } | (__bustub_internal.delete_rows:INTEGER) + Filter { predicate=#0.0=1 } | (t1.v1:INTEGER, t1.v2:VARCHAR) + SeqScan { table=t1 } | (t1.v1:INTEGER, t1.v2:VARCHAR) + +``` + +You may assume that the `DeleteExecutor` is always at the root of the query plan in which it appears. The `DeleteExecutor` should not modify its result set. + +**Hint:** To delete a tuple, you need to get a RID from the child executor and update the `is_deleted_` field of the corresponding `TupleMeta` for that tuple. + +### IndexScan + +The `IndexScanExecutor` does point lookup and ordered scan using the b-plus tree index to retrieve tuples in the table. The executor should be able to support several point lookups on the same index. + +You will need to implement the index scan by supporting the following two types of queries: + +1. **Point Lookup**: `SELECT FROM WHERE = `. You will implement the optimizer rule to transform a `SeqScan` into an `IndexScan` in the next section. +2. **Ordered Scan**: `SELECT FROM
ORDER BY `. The optimizer rule to generate an `IndexScan` for queries with `ORDER BY` on an index column has been implemented for you. Your task is to handle ASC (or default) ordering only in `IndexScan`. + +```text +bustub> CREATE TABLE t1(v1 int, v2 int); +Table created with id = 22 + +bustub> CREATE INDEX t1v1 ON t1(v1); +Index created with id = 0 + +bustub> EXPLAIN (o,s) SELECT * FROM t1 WHERE v1 = 1; +=== OPTIMIZER === +IndexScan { index_oid=0, filter=(#0.0=1) } | (t1.v1:INTEGER, t1.v2:INTEGER) + +bustub> EXPLAIN (o,s) SELECT * FROM t1 ORDER BY v1; +=== OPTIMIZER === +IndexScan { index_oid=0 } | (t1.v1:INTEGER, t1.v2:INTEGER) + +``` + +The type of the index object in the plan will always be `BPlusTreeIndexForTwoIntegerColumn` in this project. You can safely cast the object to this type and store it in the executor wherever needed: +`tree_ = dynamic_cast(index_info_->index_.get())` + +You can then do point lookup or ordered scan with the b-plus tree index and emit the satisfying tuple. In this semester, you only need to support the index on a single, unique integer column. Our test cases will not contain duplicate keys. Hence, this executor returns one tuple per point lookup if it exists. + +You will need to finish the optimizer rule in the next section to transform a `SeqScan` into an `IndexScan`. It may make more sense to implement the optimizer rule before implementing `IndexScan` to understand the kind of queries `IndexScanExecutor` will need to support. + +**Hint:** We will never insert duplicate rows into tables with indexes. +**Hint:** As above, do not emit tuples that are deleted. +**Hint:** Please use `ScanKey` for the point lookup, and use Index Iterator for the ordered scan. + +### Optimizing SeqScan to IndexScan + +As we learned in lecture, when we are querying on the indexed column, using an `IndexScan` will significantly boost the lookup performance. To this end, we need to push down the filter into the scanner so that we know the key to lookup in the index. Then we can directly retrieve the value over the index, instead of doing a full table scan. + +You would need to modify the optimizer to transform a `SeqScanPlanNode` into a `IndexScanPlanNode` when it is possible. + +Consider the following example: +`bustub> EXPLAIN (o) SELECT * FROM t1 WHERE v1 = 1;` +Without applying the `MergeFilterScan` and the `SeqScan as IndexScan` optimizer rule, the plan may look like the following: + +```text + Filter { predicate=(#0.0=1) } | (t1.v1:INTEGER, t1.v2:INTEGER, t1.v3:INTEGER) + SeqScan { table=t1 } | (t1.v1:INTEGER, t1.v2:INTEGER, t1.v3:INTEGER) + +``` + +After applying the `MergeFilterScan` and `SeqScan as IndexScan` optimizer rule, we can just do a quick index lookup instead of iterating the entire table. The resulting plan will look like the following: + +```text + IndexScan { index_oid=0, filter=(#0.0=1) } | (t1.v1:INTEGER, t1.v2:INTEGER, t1.v3:INTEGER) + +``` + +Here's the brief steps to implement this optimizer rule: + +* **Enable Predicate pushdown to SeqScan**: We can implement a predicate filter in `SeqScanExecutor` so that later the index scan node will have the predicate. We've already enabled `MergeFilterScan` optimizer rule `src/optimizer/merge_filter_scan.cpp` in the starter optimizer rules for you. +* **Use Index**: You can check the filtering columns from the predicate. If there happens to exist an index on this column, create an `IndexScanPlanNode`. Note that to get full score, you will need to support this optimizer rule in a few different situations: (1) when there's one equality test on the indexed column in predicate (i.e., `WHERE v1 = 1`) (2) when the indexed column ordering is flipped (i.e., `WHERE 1 = v1`) (3) when there are several point lookups on the same index (i.e., `WHERE v1 = 1 or v1 = 4`). Note that queries of the form `SELECT * FROM t1 WHERE v1 = 1 AND v2 = 2` should still use a seq scan, thus you do not need to split the predicates. + +Please check **Optimizer Rule Implementation Guide** section for details on implementing an optimizer rule. + +Now that you have implemented all storage related executors. In the following tasks, you can create tables and insert some values by yourself to test your own executor implementation! At this point, you should also have passed SQLLogicTests #1 to #6. + +**Hint:** You may find the utility `BPlusTreeIndex::ScanKey` function helpful. +**Hint:** Think about how to handle queries of the form `WHERE v1 = 1 OR v1 = 1`. It may help to view `AggregateKey` in `/src/include/execution/plans/aggregation_plan.h`. +**Hint:** Please only optimize `SeqScan` to `IndexScan` in the 2 scenarios mentioned above (i.e. point lookup and ordered scan). + +--- + +## Task #2 - Aggregation & Join Executors + +You will complete your implementation in the following files: + +* `src/include/execution/plans/aggregation_plan.h` +* `src/include/execution/executors/aggregation_executor.h` +* `src/execution/aggregation_executor.cpp` +* `src/include/execution/executors/nested_loop_join_executor.h` +* `src/execution/nested_loop_join_executor.cpp` +* `src/include/execution/executors/nested_index_join_executor.h` +* `src/execution/nested_index_join_executor.cpp` + +### Aggregation + +The `AggregationPlanNode` is used to support queries like the following: + +* `EXPLAIN SELECT colA, MIN(colB) FROM __mock_table_1 GROUP BY colA;` +* `EXPLAIN SELECT COUNT(colA), min(colB) FROM __mock_table_1;` +* `EXPLAIN SELECT colA, MIN(colB) FROM __mock_table_1 GROUP BY colA HAVING MAX(colB) > 10;` +* `EXPLAIN SELECT DISTINCT colA, colB FROM __mock_table_1;` + +The aggregation executor computes an aggregation function for each group of input. It has exactly one child. The output schema consists of the group-by columns followed by the aggregation columns. + +As discussed in class, a common strategy for implementing aggregation is to use a hash table, with the group-by columns as the key. In this project, you may assume that the aggregation hash table fits in memory. This means that you do not need to implement a multi-stage, partition-based strategy, and the hash table does not need to be backed by buffer pool pages. + +We provide a `SimpleAggregationHashTable` data structure that exposes an in-memory hash table (`std::unordered_map`) but with an interface designed for computing aggregations. This class also exposes an `SimpleAggregationHashTable::Iterator` type that can be used to iterate through the hash table. You will need to complete the `CombineAggregateValues` function for this class. + +The aggregation executor itself will not need to handle the `HAVING` predicate. The planner will plan aggregations with a `HAVING` clause as an `AggregationPlanNode` followed by a `FilterPlanNode`. + +**Hint:** In the context of a query plan, aggregations are pipeline breakers. This may influence the way that you use the `AggregationExecutor::Init()` and `AggregationExecutor::Next()` functions in your implementation. Carefully decide whether the build phase of the aggregation should be performed in `AggregationExecutor::Init()` or `AggregationExecutor::Next()`. +**Hint:** You must handle `NULL` values in the input of the aggregation functions. See test cases for expected behavior. +**Hint:** Group-by columns can also have `NULL` values. You may want to consider modifying the way aggregate keys are compared. +**Hint:** When performing aggregation on an empty table, `CountStarAggregate` should return zero and all other aggregate types should return `integer_null`. + +### NestedLoopJoin + +The DBMS will use `NestedLoopJoinPlanNode` for all join operations, by default. + +* `EXPLAIN SELECT * FROM __mock_table_1, __mock_table_3 WHERE colA = colE;` +* `EXPLAIN SELECT * FROM __mock_table_1 INNER JOIN __mock_table_3 ON colA = colE;` +* `EXPLAIN SELECT * FROM __mock_table_1 LEFT OUTER JOIN __mock_table_3 ON colA = colE;` + +You will need to implement an inner join and left join for the `NestedLoopJoinExecutor` using the simple nested loop join algorithm from class. The output schema of this operator is all columns from the left table followed by all columns from the right table. For each tuple in the outer table, consider each tuple in the inner table and emit the ones that satisfy the join predicate. + +**Hint:** You should use the predicate in the `NestedLoopJoinPlanNode`. See `AbstractExpression::EvaluateJoin`. Note that this returns a `Value`, which could be false, true, or NULL. + +### NestedIndexJoin + +The DBMS will use `NestedIndexJoinPlanNode` if the query contains a join with an equi-condition and the right side of the join has an index over the condition. + +```text +CREATE TABLE t1(v1 int, v2 int); +CREATE TABLE t2(v3 int, v4 int); +CREATE INDEX t2v3 on t2(v3); +EXPLAIN SELECT * FROM t1 INNER JOIN t2 ON v1 = v3; +=== PLANNER === +Projection { exprs=[#0.0, #0.1, #0.2, #0.3] } | (t1.v1:INTEGER, t1.v2:INTEGER, t2.v3:INTEGER, t2.v4:INTEGER) + NestedLoopJoin { predicate=#0.0=#1.0 } | (t1.v1:INTEGER, t1.v2:INTEGER, t2.v3:INTEGER, t2.v4:INTEGER) + SeqScan { table=t1 } | (t1.v1:INTEGER, t1.v2:INTEGER) + SeqScan { table=t2 } | (t2.v3:INTEGER, t2.v4:INTEGER) +=== OPTIMIZER === +NestedIndexJoin { type=Inner, key_predicate=#0.0, index=t2v3, index_table=t2 } | (t1.v1:INTEGER, t1.v2:INTEGER, t2.v3:INTEGER, t2.v4:INTEGER) + SeqScan { table=t1 } | (t1.v1:INTEGER, t1.v2:INTEGER) + +``` + +In the plan phase, the query is planned as a `NestedLoopJoin` of two tables. The optimizer identifies that the right side of the join (`SeqScan t2`) has an index on column `v3`, and the join condition is an equi-condition `v1 = v3`. + +The schema of `NestedIndexJoin` is all columns from the left table (child, outer) and then from the right table (index, inner). This executor will have only one child that propagates tuple batches corresponding to the outer table of the join. For each of these tuples, you will need to find the corresponding tuple in the inner table that matches the index key given by utilizing the index in the catalog. + +**Hint:** You will want to fetch the tuple from the outer table, construct the index probe key by using `key_predicate`, and then look up the RID in the index to retrieve the corresponding tuple for the inner table. + +We will provide all test cases on Gradescope AS-IS. At this point, you should pass SQLLogicTests - #7 to #13. + +--- + +## Task #3 - HashJoin Executor and Optimization + +You will complete your implementation in the following files: + +* `src/include/storage/page/intermediate_result_page.h` +* `src/include/execution/executors/hash_join_executor.h` +* `src/execution/hash_join_executor.cpp` +* `src/optimizer/nlj_as_hash_join.cpp` + +### HashJoin + +The DBMS can use `HashJoinPlanNode` if a query contains a join with a conjunction of several equi-conditions between two columns. + +You will need to implement the inner join and left join for `HashJoinExecutor` using the hash join algorithm from class. The output schema of this operator is all columns from the left table followed by all columns from the right table. It is possible that the probe hash table may NOT fit entirely in memory (assuming our memory can support hash table of up to 4KB tuples). So your implementation should follow the **Grace Hash Table** algorithm discussed in lecture. + +You should design the page layout and implement the read/write methods for the `IntermediateResultPage`. It is recommended to first read Task 4 to decide if you want to use the same implementation for both tasks. + +Your implementation should correctly handle hash collisions. Use `GetLeftJoinKey()` and `GetRightJoinKey()` in the `HashJoinPlanNode`. + +**Hint:** Take a look at `SimpleAggregationHashTable` for hashing tuples with multiple attributes. +**Hint:** The build side of a hash join is a pipeline breaker. + +### Optimizing NestedLoopJoin to HashJoin + +Hash joins usually yield better performance than nested loop joins. You should modify the optimizer to transform a `NestedLoopJoinPlanNode` into a `HashJoinPlanNode` when possible (conjunction of equi-conditions connected by `AND`). + +```text +bustub> EXPLAIN (o) SELECT * FROM test_1 t1, test_2 t2 WHERE t1.colA = t2.colA AND t1.colB = t2.colC; + +``` + +Resulting plan: + +```text + HashJoin { type=Inner, left_key=[#0.0, #0.1], right_key=[#0.0, #0.2] } + SeqScan { table=test_1 } + SeqScan { table=test_2 } + +``` + +**Hint:** Check which table the column belongs to using `ColumnValueExpression::GetTupleIdx`. +**Hint:** Extract out keys recursively when dealing with multiple equi-conditions. + +At this point, you should pass SQLLogicTests - #14 to #15. + +--- + +# Task #4: External Merge Sort + Limit Executors + Window Functions + +You will complete your implementation in the following files: + +* `src/include/storage/page/intermediate_result_page.h` +* `src/execution/execution_common.cpp` +* `src/include/execution/executors/external_merge_sort_executor.h` +* `src/execution/external_merge_sort_executor.cpp` +* `src/include/execution/executors/limit_executor.h` +* `src/execution/limit_executor.cpp` +* `src/include/execution/executors/window_function_executor.h` +* `src/execution/window_function_executor.cpp` + +You need to implement `IndexScanExecutor` in `Task #1` before starting this task. If there is an index over a table, the query processing layer will automatically pick it for sorting. In other cases, you will need a special sort executor to do this, which, in our case, is the external merge sort executor. + +The limit executor will be much easier to implement than the external merge sort executor. Therefore, feel free to implement the limit executor first if you are stuck on the external merge sort executor. You should however be aware that we won't test the limit executor without the external merge sort executor. + +For all `ORDER BY` clauses, we assume every sort key will only appear once. You do not need to worry about ties in sorting. You will also have to support sorting columns with NULL values with external merge sort. + +--- + +## External Merge Sort + +Except in the case that the `ORDER BY` attributes matches the keys of an index, BusTub will use a `SortPlanNode` for all `ORDER BY` operators. + +`EXPLAIN SELECT * FROM __mock_table_1 ORDER BY colA ASC, colB DESC NULLS FIRST;` + +This plan node does not change schema (i.e., the output schema is the same as the input schema). You can extract sort keys from `order_bys`. If the query does not include a sort direction in the `ORDER BY` clause (i.e., `ASC`, `DESC`), then the sort mode will be `default` (which is `ASC`). If the query does not specify a `NULLS FIRST` or `NULLS LAST` option in the `ORDER BY` clause, then the placement of NULL values will use `default`, which is `NULLS FIRST` for ascending order and `NULLS LAST` for descending order. + +One important assumption about the external merge sort is that the entries in a table will **NOT** be able to fit in memory. Therefore, you need to follow what you learned in the lecture: store the intermediate sorting results in temporary pages and do merge sort recursively based on the sorted results of the previous round. + +Similar to HashJoin, a specific page format will be needed for the intermediate sorting results. You should design the page layout and implement the read/write methods for the page. You may choose to reuse the same layout as the `IntermediateResultPage` used in HashJoin, or define a new one if needed. + +Your implementation of `IntermediateResultPage` must be able to support sorting tuples containing `VARCHAR` attributes. To simplify your implementation of the page, the tuples containing `VARCHAR` attributes will **never be larger than the page size**, i.e. you do not have to worry about storing a single tuple across two `IntermediateResultPages`. If the current page does not fit the tuple, you can just get a new page and store it there. + +You will then implement the merge sort algorithm. It is not hard to do an in-memory merge sort. But when it involves the disk, you should carefully think of how you manipulate the sorted tuples via the page interfaces and the buffer pool. It is worth noting that the page that's no longer in use (i.e. from the previous round of merge sort) should be deleted, or you will have a bunch of "zombie" pages that will never be referred to but still exist in the buffer pool. + +It's important that your are doing the merge sort **externally** (not storing all data purely in memory). You are allowed to use `std::sort` to sort tuples fitting within one sort page, but NOT on all tuples. Also, your `IntermediateResultPage` layout should be compact, i.e. as little fragmentation as possible. You should also make sure that the pages are actually deleted after the merge sort is done. + +Also, we will only test on **two-way external merge sort** this semester even though `ExternalMergeSortExecutor` is templated. Therefore, feel free to implement a two-way merge sort algorithm instead of a k-way one. + +**Hint:** For comparison of tuples based on the sort key, we provide a helper class `TupleComparator` in `execution_common.h`. You can fill out its implementation in `execution_common.cpp`. + +**Hint:** To better understand what to do and where to start for `IntermediateResultPage`, you can take a look at the layout of the index pages you worked on in project 2 (e.g. a `char` array can be used as the start of page data). It would be helpful to think of how your executor will call the read/write methods. + +**Hint:** You don't really need to manually manipulate the "write-to-disk" part when doing the external merge sort. The buffer pool manager will handle this for you. Remember what you implemented in P1: the page guard will automatically pin and unpin a page, as well as setting the evictability. Just make proper use of the page guard and you will not be worrying too much for page manipulation in this project. + +--- + +## Limit + +The `LimitPlanNode` specifies the number of tuples that query will generate. Consider the following example: + +`EXPLAIN SELECT * FROM __mock_table_1 LIMIT 10;` + +The `LimitExecutor` constrains the number of output tuples from its child executor. If the number of tuples produced by its child executor is less than the limit specified in the plan node, this executor has no effect and yields all of the tuples that it receives. + +This plan node does not change schema (i.e., the output schema is the same as the input schema). You do **not** need to support offsets. + +--- + +## Window Functions + +In general, window functions have three parts: partition by, order by, and window frames. All three are optional, so multiple combinations of these features make the window function daunting at first. However, the conceptual model for a window function helps make it easier to understand. The conceptual model is the following: + +![](https://15445.courses.cs.cmu.edu/fall2025/project3/img/window_function_execution_model.jpg) + +* Split the data based on the conditions in the partition by clause. +* Then, in each partition, sort by the order by clause. +* Then, in each partition (now sorted), iterate over each tuple. For each tuple, we compute the boundary condition for the frame for that tuple. Each frame has a start and end (specified by the window frame clause). The window function is computed on the tuples in each frame, and we output what we have computed in each frame. + +The diagram below shows the general execution model of the window function. + +Let's dive deeper with a few examples using the following table: + +```sql +CREATE TABLE t (user_name VARCHAR(1), dept_name VARCHAR(16), salary INT); +INSERT INTO t VALUES ('a', 'dept1', 100); +INSERT INTO t VALUES ('b', 'dept1', 200); +INSERT INTO t VALUES ('c', 'dept1', 300); +INSERT INTO t VALUES ('e', 'dept2', 100); +INSERT INTO t VALUES ('d', 'dept2', 50); +INSERT INTO t VALUES ('f', 'dept2', 60); + +``` + +**Example #1** The below example calculates a moving average of the salary for each department. You can consider it as first sort the rows for each partition by name and then calculate the average of the row before the current row, current row and the row after current row. + +```sql +bustub> SELECT user_name, dept_name, AVG(salary) OVER \ + (PARTITION BY dept_name ORDER BY user_name ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING) \ + FROM t; ++-----------+-----------+-----------------------------+ +| user_name | dept_name | salary | ++-----------+-----------+-----------------------------+ +| a | dept1 | 150 | +| b | dept1 | 200 | +| c | dept1 | 250 | +| d | dept2 | 75 | +| e | dept2 | 70 | +| f | dept2 | 80 | ++-----------+-----------+-----------------------------+ + +``` + +**Example #2** The query below calculates a moving average of the salary for each department. Different from previous example, when window frames are omitted and order by clauses not omitted, it calculates from the first row to the current row for each partition. + +```sql +bustub> SELECT user_name, dept_name, AVG(salary) OVER (PARTITION BY dept_name ORDER BY user_name) FROM t; ++-----------+-----------+-----------------------------+ +| user_name | dept_name | salary | ++-----------+-----------+-----------------------------+ +| a | dept1 | 100 | +| b | dept1 | 150 | +| c | dept1 | 200 | +| d | dept2 | 50 | +| e | dept2 | 75 | +| f | dept2 | 70 | ++-----------+-----------+-----------------------------+ + +``` + +**Example #3** This query show that when order by and window frames are both omitted, it calculates from the first row to the last row for each partition, which means the results within the partition should be the same. + +```sql +bustub> SELECT user_name, dept_name,AVG(salary) OVER (PARTITION BY dept_name) FROM t; ++-----------+-----------+-----------------------------+ +| user_name | dept_name | salary | ++-----------+-----------+-----------------------------+ +| a | dept1 | 200 | +| b | dept1 | 200 | +| c | dept1 | 200 | +| e | dept2 | 70 | +| d | dept2 | 70 | +| f | dept2 | 70 | ++-----------+-----------+-----------------------------+ +bustub> SELECT user_name, dept_name, AVG(salary) OVER () FROM t; ++-----------+-----------+-----------------------------+ +| user_name | dept_name | salary | ++-----------+-----------+-----------------------------+ +| a | dept1 | 135 | +| b | dept1 | 135 | +| c | dept1 | 135 | +| e | dept2 | 135 | +| d | dept2 | 135 | +| f | dept2 | 135 | ++-----------+-----------+-----------------------------+ + +``` + +For this task, you do **not** need to handle window frames. As in the above examples, you only need to implement `PARTITION BY` and `ORDER BY` clauses. You may notice that the `ORDER BY` clauses also change the order of non-window function columns. This is not necessary as the output order is not guaranteed and depends on the implementation. For simplicity, BusTub ensures that all window functions within a query have the same `ORDER BY` clauses. This means the following queries are **not** supported in BusTub and your implementation does not need to handle them: + +`SELECT SUM(v1) OVER (ORDER BY v1), SUM(v1) OVER (ORDER BY v2) FROM t1;` +`SELECT SUM(v1) OVER (ORDER BY v1), SUM(v2) OVER () FROM t1;` + +The test case will not check the order of output rows as long as columns within each row are matched. Therefore, you can sort the tuples first before doing the calculations when there are `ORDER BY` clauses, and do not change the order of tuples coming from the child executor when there are no order by clauses. + +You can implement the executor in the following steps: + +1. Sort the tuples as indicated in `ORDER BY`. +2. Generate the initial value for each partition +3. Combine values for each partition and record the value for each row. + +You may reuse the code from sort executors to complete step 1 and the code from aggregation executor to complete step 2 and step 3. + +Apart from aggregation functions implemented in previous tasks, you will need to implement `RANK` as well. The BusTub planner ensures that `ORDER BY` clause is not empty if `RANK` window function is present. Be aware that there might be ties and please refer to test cases for the expected behavior. + + +## Additional Information + +This section provides some additional information on other system components in BusTub that you will need to interact in order to complete this project. + +### System Catalog + +A database maintains an internal catalog to keep track of meta-data about the database. In this project, you will interact with the system catalog to query information regarding tables, indexes, and their schemas. + +The entirety of the catalog implementation is in `src/include/catalog/catalog.h` . You should pay particular attention to the member functions `Catalog::GetTable()` and `Catalog::GetIndex()`. You will use these functions in the implementation of your executors to query the catalog for tables and indexes. + +### Index Updates + +For the table modification executors (**InsertExecutor**, **UpdateExecutor**, and **DeleteExecutor**) you must modify all indexes for the table targeted by the operation. You may find the `Catalog::GetTableIndexes()` function useful for querying all of the indexes defined for a particular table. Once you have the `IndexInfo` instance for each of the table's indexes, you can invoke index modification operations on the underlying index structure. + +In this project, we use your implementation of b-plus tree index from **Project #2** as the underlying data structure for all index operations. Therefore, successful completion of this project relies on a working implementation of the b-plus tree. + +### Optimizer Rule Implementation Guide + +The BusTub optimizer is a rule-based optimizer. Most optimizer rules construct optimized plans in a bottom-up way. Because the query plan has this tree structure, before applying the optimizer rules to the current plan node, you want to first recursively apply the rules to its children. + +At each plan node, you should determine if the source plan structure matches the one you are trying to optimize, and then check the attributes in that plan to see if it can be optimized into the target optimized plan structure. + +In the public BusTub repository, we already provide the implementation of several optimizer rules. Please take a look at them as reference. + +## Testing + +We will use **SQLLogicTest** to perform testing and benchmarking. To use it, + +```bash +make -j$(nproc) sqllogictest +./bin/bustub-sqllogictest ../test/sql/p3.00-primer.slt --verbose + +``` + +You can use the **bustub-sqllogictest** program to run **slt** files. Remember to recompile **sqllogictest** before doing any testing. In this project, we provide ALL test cases to you. There are no hidden tests. The test cases are located at **test/sql/** . + +--- + +## Formatting + +Your code must follow the **Google C++ Style Guide**. We use **Clang** to automatically check the quality of your source code. Your project grade will be **zero** if your submission fails any of these checks. + +Execute the following commands to check your syntax. The **format** target will automatically correct your code. The **check-lint** and **check-clang-tidy-p3** targets will print errors and instruct you how to fix it to conform to our style guide. + +```bash +$ make format +$ make check-lint +$ make check-clang-tidy-p3 + +``` + +--- + +## Memory Leaks + +For this project, we use **LLVM Address Sanitizer (ASAN) and Leak Sanitizer (LSAN)** to check for memory errors. To enable ASAN and LSAN, configure CMake in debug mode and run tests as you normally would. If there is memory error, you will see a memory error report. Note that macOS **only supports address sanitizer without leak sanitizer**. + +In some cases, address sanitizer might affect the usability of the debugger. In this case, you might need to disable all sanitizers by configuring the CMake project with: + +```bash +$ cmake -DCMAKE_BUILD_TYPE=Debug -DBUSTUB_SANITIZER= .. + +``` + +--- + +## Development Hints + +You can use **BUSTUB_ASSERT** for assertions in debug mode. Note that the statements within **BUSTUB_ASSERT** will NOT be executed in release mode. If you have something to assert in all cases, use **BUSTUB_ENSURE** instead. + +If you are having compilation problems, running **make clean** does not completely reset the compilation process. You will need to delete your build directory and run **cmake ..** again before you rerun **make**. diff --git a/benchmarks/courselab_bench/data/cmu_15-445/task_database/task4.md b/benchmarks/courselab_bench/data/cmu_15-445/task_database/task4.md new file mode 100644 index 00000000..697bfc3f --- /dev/null +++ b/benchmarks/courselab_bench/data/cmu_15-445/task_database/task4.md @@ -0,0 +1,646 @@ +# Phase 4 + +**YOU ARE ONLY ALLOWED TO MODIFY OR ADD FILES IN THE src DIRECTORY.** + +## Overview + +In this project, you will add transaction support for BusTub by implementing optimistic multi-version concurrency control (MVOCC). The project consists of four required tasks and two leaderboard benchmarks. + +* **Task #1 - Timestamps** +* **Task #2.1 - Storage Format** +* **Task #2.2 - Sequential Scan / Tuple Retrieval** +* **Task #3 - MVCC Executors** +* **Task #4 - Primary Key Index** + +This project must be completed individually (i.e., no groups). Before you start, please run `git pull public master` to pull the latest code from the public BusTub repo and then rerun `cmake` to reconfigure the Makefile. + +We would recommend reading over everything once before starting to write code. If you don't have time to do that (since this writeup is quite long), you can focus on reading the sections for just Task 1 and 2, as they contain specific guidance on implementation. This will be much faster than missing out on some information, implementing something incorrectly, and then having to start over. + +## Background + +In the previous projects, BusTub operated as a single-versioned DBMS. You will now add support for MVCC without modifying its core table storage architecture (i.e., table heaps). The high-level version of this MVCC protocol has been used in DBMSs like HyPer and DuckDB. + +The storage model in this protocol is similar to the delta table architecture discussed in the lecture. For every single tuple that is stored, the DBMS additionally stores tuple deltas in undo logs. The tuple in the table heap and its corresponding undo logs / deltas form a singly-linked list called the **version chain**. With this version chain, the DBMS logically "stores" every previous version of a tuple. That is, the DBMS only stores the deltas between each version and not complete versions of a tuple. + +The DBMS stores undo logs within a transaction's in-memory workspace, and transactions themselves are stored in the transaction manager's in-memory data structure. In a production system these logs would be persisted to disk, but this project does not require that for BusTub. + +You will first need to implement the **SNAPSHOT ISOLATION** isolation level for all of your transactions. You will then expand your concurrency control protocol to support the **SERIALIZABLE** isolation level in Task #4.4. + +In every test case, all transactions in the test case run at the same isolation level. All concurrent test cases are public, and all hidden test cases are single-threaded. On Gradescope, you will find a description of what each test case is doing. + +--- + +## Project Specification + +Like previous projects, we have provided classes that define the APIs you must implement. Do not modify the signatures of the predefined functions or remove predefined member variables in these classes unless indicated. If you do, our test code will not work, and you will not receive credit for the project. You may add private helper functions and member variables to these classes as needed. + +Here are the list of files you will likely need to modify in this project: + +* `src/include/concurrency/transaction_manager.h` +* `src/concurrency/transaction_manager.cpp` +* `src/include/execution/execution_common.h` +* `src/execution/execution_common.cpp` +* `src/include/execution/executors/seq_scan_executor.h` +* `src/execution/seq_scan_executor.cpp` +* `src/include/execution/executors/index_scan_executor.h` +* `src/execution/index_scan_executor.cpp` +* `src/include/execution/executors/insert_executor.h` +* `src/execution/insert_executor.cpp` +* `src/include/execution/executors/update_executor.h` +* `src/execution/update_executor.cpp` +* `src/include/execution/executors/delete_executor.h` +* `src/execution/delete_executor.cpp` +* `src/include/concurrency/watermark.h` +* `src/concurrency/watermark.cpp` + +And here is a list of functions / classes that might be helpful in this project: + +* **TableHeap:** `MakeIterator`, `GetTuple`, `GetTupleMeta`, `UpdateTupleMeta`, `UpdateTupleInPlace`, `MakeIterator`, `MakeEagerIterator` (and for Task #3.6 and beyond, everything with `Lock`). +* **Tuple:** `SetRid`, `GetRid`, additional `Tuple` constructors, `Empty`, `IsTupleContentEqual`, `GetValue`. +* **Value:** `ValueFactory::Get____`, `ValueFactory::GetNullValueByType`, `CompareExactlyEquals`. +* **Schema:** `GetColumn`, `GetColumnCount`. +* **TransactionManager:** `UpdateUndoLink`, `GetUndoLink`, `GetUndoLog`, `GetUndoLogOptional`, `UpdateTupleAndUndoLink`, `GetTupleAndUndoLink` +* **Transaction:** All member functions are important, as well as the `UndoLink` and `UndoLog` structs. + +You will likely need to frequently map an optional value to something else. You can use the following syntax to write more concise code (monadic operations): `auto x = opt.has_value() ? operation(*opt) : std::nullopt;`. + +You can also use C++14 tuple unpacking syntax: `auto [meta, tuple] = iter->GetTuple();`. + +The correctness of this project depends on the correctness of your implementation of Project #1 and Project #2. You can get a full score in this project without a complete implementation of Project #3, but this is because you will need to rewrite most of the access method executors you already implemented based on MVCC storage. Additionally, a working implementation of the optimizer rule that transforms a sequential scan into an index scan is required for Task 4.2. Finally, a working aggregation executor from Project #3 is required to complete the leaderboard test in this project. + +We do not provide solutions for previous projects. + +--- + +## Task #1 - Timestamps + +In BusTub, each transaction is assigned two timestamps: a read timestamp and commit timestamp. We will walk through how those timestamps are assigned. In this task, you will need to implement this on the transaction manager so that it can assign timestamps correctly to transactions. + +### 1.1 Timestamp Allocation + +When a transaction begins, it is assigned a **read timestamp** that is equal to the commit timestamp of the most recently committed transaction. At a high level, you can think of this as recording the timestamp of the latest atomic write into the database. The read timestamp determines what data can be safely and correctly read by a transaction. That is, the read timestamp determines the latest version of a tuple that the current transaction can see. + +When a transaction commits, it is assigned a monotonically-increasing **commit timestamp**. The commit timestamp determines the serialization order of the transactions. Since these commit timestamps are unique, we can also uniquely identify committed transactions by their commit timestamp. + +Here is an example of a table heap and version chain after several writes / updates to 4 tuples in the table heap (A, B, C, D): + +![](https://15445.courses.cs.cmu.edu/fall2025/project4/img/1-1-ts.png) + +In this diagram, A1 refers to the first version of tuple A, and A3 refers to third version of tuple A. A4 refers to the fourth version of tuple A, and it is also the most recent or "true" version of tuple A. B4 and C4 are actually the third version of tuples B and C respectively, and we are only notating it like this for the sake of the explanation below. + +The timestamps (ts=#) refer to the commit timestamps of the transactions that each of the tuples belong to. So [A1, B1, C1] belong to the transaction with commit timestamp = 1 (which we can shorthand refer to as transaction 1), and [A3, B3, D3] belong to the transaction with commit timestamp = 3 (transaction 3). The most recent committed transaction in this diagram is the transaction with commit timestamp = 4 with [A4, B4, C4]. + +Suppose we have a transaction with an assigned read timestamp of 3. This means our transaction started after transaction 3 committed and before transaction 4 committed. Thus, our transaction is only be able to observe [A3, B3, C2, D3]. + +For [A, B, C], our transaction cannot observe [A4, B4, C4] because those version of the tuples all have a timestamp of 4 (which is in the future relative to our read timestamp of 3). Our transaction needs to traverse the undo logs for each tuple and read the first version it encounters that has a version less than or equal to 3. For A and B, the first versions it encounters are A3 and B3. For C, the first version encountered is C2. For D, since the current version of the tuple already has a timestamp of 3, it is safe to read directly. + +Another example is if our read timestamp was 2, then our transaction would see only [A2, B1, C2], since D was only created at timestamp 3 (which is in the future relative to our read timestamp 2). + +You will need to assign the transactions with the correct read timestamp and commit timestamp in this task. See `TransactionManager::Begin` and `TransactionManager::Commit` in `src/include/concurrency/transaction_manager.h` for more information. We have already provided the starter code for `TransactionManager::Abort`, and you do not need to change anything in `Abort` in order to get full points for Task #1. + +### 1.2 Watermark + +The **watermark** is the lowest read timestamp among all transactions that have not yet committed or aborted. If there is no such a transaction, the watermark is the latest commit timestamp. The easiest way of computing the watermark is to iterate over all transactions in the transaction manager map and find the minimum `read_ts` among all in-progress transactions. + +However, this simple strategy is inefficient. In this task, you will need to implement an algorithm in O(log N) time complexity that computes the watermark. Please refer to `watermark.h` and `watermark.cpp` for more information. You will also need to call `Watermark::AddTxn` and `Watermark::RemoveTxn` when a transaction starts / commits / aborts. + +There are many ways to do this. The reference solution implements an amortized O(1) algorithm using a hash map, and there is additionally a useful container in the C++ standard library that might make an O(log N) implementation easy. + +You should pass all test cases in the `TxnTimestampTest` suite at this point. + +--- + +## Task #2 - Storage Format and Sequential Scan + +BusTub stores transaction data in three places: the table heap, the transaction manager, and inside each transaction's workspace. The table heap always contains the latest tuple data. The transaction manager stores a pointer to the latest undo log for every tuple (`PageVersionInfo`). Transactions store the undo logs that they create, which record how a transaction has modified a tuple. + +Below is a representation of the same diagram as above, but with the `PageVersionInfo` struct included and the undo logs located in a specific transaction's workspace. The dotted lines in the diagram are not pointers, but rather are logical connections via the transaction manager. + +![](https://15445.courses.cs.cmu.edu/fall2025/project4/img/2-1-storage-format.png) + +To retrieve a tuple at any given read timestamp, you will need to (1) fetch all modifications (aka. undo logs) that happened after the given timestamp, and (2) roll back those modifications (“undo” the undo logs) from the latest version of the tuple to recover the past version of that tuple. + +This is similar to the delta table storage model that we covered in the lectures, except that there is no physical “delta table” to store the delta records. Instead, these records are stored within the workspace of each transaction (not being persisted on the disk) so as to simplify the implementation. + +### Data Structures & Helper Functions + +This section is an introduction to guide you through the data structures you will need for tuple reconstruction and further operations. We would recommend reading this introduction together with the starter code. You come back to this section if you encounter any problems or confusion in other tasks. + +The DBMS stores `Tuple` and `TupleMeta` data in `TableHeap` (`src/include/storage/table_heap.h`). You can call helper functions like `GetTuple` or `GetTupleMeta` to get that data, `UpdateTupleInPlace` to update tuple for single-threaded test cases, and `InsertTuple` to insert tuples into the table heap (these functions are defined in `table_heap.h`). You may notice that there are also functions like `UpdateTupleInPlaceWithLockAcquired`, which are functions you will use for the concurrent tasks. + +The transaction header (`src/include/concurrency/transaction.h`) contains the classes and objects used to track a transaction's runtime behavior and state. + +The `UndoLog` struct stores the information about the modification / deletion of a tuple by a transaction. Task 2.1 introduces the format of `UndoLog` in detail. The DBMS can reconstruct tuples based on these `UndoLogs`. Each transaction stores a vector of `UndoLogs` deltas of every tuple that the transaction has modified. For example, if txn1 updates tuple 1 and tuple 2, txn1 will store an `UndoLog` for both tuple 1 and tuple 2. The transaction will record both the delta between the previous versions of tuple 1 and tuple 2 as well as txn1's version of them. Your implementation should call `ModifyUndoLog` to modify an existing `UndoLog` and `AppendUndoLog` to append new `UndoLogs` (`src/include/concurrency/transaction.h`). By storing all of the `UndoLog` of a single transaction together, we can easily make updates to these modified tuples and their version info when the transaction commits or aborts. + +The `UndoLink` struct is the pointer to the `UndoLog`. We use `UndoLink` to link all the `UndoLog` for each tuple together. They are defined as below: + +```cpp +/** Represents a link to a previous version of this tuple */ +struct UndoLink { + /* Previous version can be found in which txn */ + txn_id_t prev_txn_{INVALID_TXN_ID}; + /* The log index of the previous version in `prev_txn_` */ + int prev_log_idx_{0}; +}; + +``` + +An `UndoLink { prev_txn_: txn5, prev_log_idx_: i }` points to the ith `UndoLog` in txn5's `undo_logs_` buffer. You can get the target `UndoLog` from given `UndoLink` by calling `GetUndoLog` and `GetUndoLogOptional` (`src/concurrency/transaction_manager_impl.cpp`). If the `prev_txn_` in the `UndoLink` has an invalid transaction id, it means that the `UndoLink` is invalid and it does not point to any valid `UndoLog`, so you only want to use `GetUndoLog` when you know the `UndoLink` is valid, otherwise you can use `GetUndoLogOptional`. + +### 2.1 Tuple Reconstruction + +In this task, you will implement the tuple reconstruction algorithm via the `ReconstructTuple` function, defined in `execution_common.cpp`. During this project, you will likely find that many functionalities can be shared by different components in the system. You can define helper functions in `execution_common.cpp`. + +`ReconstructTuple` takes four inputs: + +1. A tuple schema. +2. A base tuple. +3. Metadata (both stored in the table heap). +4. A list of undo logs in order of most recent modification to oldest modification. + +Here is an example of reconstructing a tuple: +![](https://15445.courses.cs.cmu.edu/fall2025/project4/img/2-2-undo-log.png) + +Base tuples (under "latest version in table heap") always store a value for every column in their schema (or in other words, they are complete and valid tuples). Undo logs, however, only contain the columns that were changed by an operation. Undo logs also have an `is_delete` flag that represents the deletion of the entire tuple. + +Both the base tuple metadata and the undo logs will have `is_delete` flags, and they will not always be equal. In task 4.2, you will have to "insert" a tuple into an existing RID, and therefore you will need this `is_delete` flag in your `UndoLog` to perform this kind of operation (imagine inserting and deleting the same exact tuple in a cycle). An example of the `is_delete` flag works is illustrated below. Make sure you understand that these undo logs are going backwards in time. As an exercise, try to figure out the sequence of operations that could have led to this specific version chain: + +![](https://15445.courses.cs.cmu.edu/fall2025/project4/img/2-2-undo-log-with-del.png) + +`ReconstructTuple` should apply all modifications provided to the function without looking at the timestamp in the metadata or undo logs. It does not need to access data other than the ones provided in the function parameter list. In other words, make sure you are not passing too many undo logs to `ReconstructTuple`. + +Below is an illustration of the structure of `UndoLog`: + +![](https://15445.courses.cs.cmu.edu/fall2025/project4/img/2-4-undo-log-format.png) + +`UndoLog` represents a partial modification to some tuple (at some point in time determined by the `ts_` field). The `modified_fields_` member in `UndoLog` is a vector of `bool` that has the same length as the number of columns in the table schema. If one of the booleans is set to true, it indicates that the corresponding field in the tuple has been updated by that `UndoLog`. For example, if the 3rd element (index 2) of the `modified_fields_` vector is set to true, then that means the third column of the tuple was updated. + +The `tuple_` field contains the partial `Tuple`, and it should have the same number of values / columns as the `modified_fields_` vector has trues. To retrieve a value from the partial tuple, you will need to construct a partial `Schema` based on the table schema and the modified fields. You can then use that partial `Schema` to extract values out of the partial `Tuple`. + +The timestamp (`ts_`) is the commit timestamp that this `UndoLog` corresponds to. We also store a link to the next `UndoLog` (`prev_version_` is stored via an `UndoLink`). If an `UndoLog` is the last in the version chain, `TxnId` (which corresponds to `prev_txn_` inside `UndoLink` in the code) will be set to `INVALID_TXN`. You can use the `prev_version_.IsValid()` helper to check this quickly. As a reminder, you do not need to use or even examine the timestamp (`ts_`) field and the previous version (`prev_version_`) field in `ReconstructTuple`, as `prev_version_` should only be used by the caller of `ReconstructTuple` to figure out what `UndoLogs` to place in the input vector. + +In the example above, we are storing tuples with four columns. This specific `UndoLog` represents a modification of fields 2 and 3. Try to write out pseudocode (on paper) of how to reconstruct the past tuple with these 2 specific fields changed back. Then, try to generalize that algorithm for any type of input tuple, any number of fields modified, and for any number of undo logs. Once you've done that, you can write `ReconstructTuple`! + +### 2.2 Sequential Scan (Tuple Retrieval) + +In this task, you will need to rewrite your sequential scan executor from Project #3 to support retrieving data from the past (based on the read timestamp of a transaction). + +For every tuple that your new sequential scan executor scans from the table heap, it should retrieve all of the undo logs for that tuple up to the transaction read timestamp, reconstruct the past tuple version, and then output that past tuple. You will need to implement the `CollectUndoLogs` helper function in `execution_common.cpp`. This function returns all of the undo logs you need to reconstruct the tuple with respect to the given transaction's read timestamp. + +Given the current transaction's read timestamp, there are three cases you will need to handle: + +1. **The tuple in the table heap is the most recent data relative to the read timestamp.** You can figure this out by checking the timestamp in the tuple's metadata. In this case, no undo needs to be performed, and `CollectUndoLogs` should return an empty vector. +2. **The tuple in the table heap has either been modified by another uncommitted transaction, or it is newer than the transaction read timestamp.** In this case, you will need to iterate the version chain to collect all undo logs that are after the read timestamp. +3. **The tuple in the table heap contains modifications by the current transaction.** In other words, we are reading a tuple that we ourselves have modified. An explanation of this case is below. + +To support case 3 without making changes to the structure of our timestamps (which are just `int64_t`), we will use the higher bits of the timestamps as tags to represent "temporary" timestamps. In BusTub, a commit timestamp is valid if it is in between 0 and `TXN_START_ID - 1`. `TXN_START_ID` is defined as the second most significant bit of a 64-bit integer (`1 << 62`). + +If the second most significant bit of a timestamp is set to 1 (`& 1 << 62`), it means that the tuple has been modified by a transaction and that transaction has not been committed yet. We call this timestamp a **“temporary transaction timestamp”**, which is computed via `TXN_START_ID + txn_human_readable_id = temp_txn_id`. We adopt this methodology to distinguish between a committed tuple with a commit timestamp and an uncommitted tuple under some specific transaction ID. `UndoLogs` should never contain temporary transaction timestamps (we will explain in later sections). + +The reason we do not use the actual most significant bit (`1 << 63`) is so that we can continue to compare temporary timestamps in a straightforward way with `<` and `>`. Setting the most significant bit would cause timestamps to be negative. + +The first transaction ID in BusTub is `TXN_START_ID`, and IDs are monotonically increasing. Make sure you understand that transaction IDs are not the same as commit timestamps, even though both are monotonically increasing. Since `TXN_START_ID` is a large number that is hard to interpret, we will generate a human-readable id by stripping the highest bit when logging and debugging. You should not need to manually calculate the temporary timestamp for uncommitted transactions, and you can use the existing helper function `GetTransactionTempTs` to return temporary timestamp for this transaction (`src/include/concurrency/transaction.h`). + +We will use the notation `txn***`, where `***` is a human-readable ID, for representing transaction IDs. For example, `txn42` represents the transaction with ID `TXN_START_ID + 42`, or the transaction with a human-readable ID of 42. Suppose that the current transaction has a human-readable ID of 3, and it scans a base tuple with timestamp `TXN_START_ID + 3`. The transaction then knows that it itself was the most recent modifier of the tuple. Handling this sub-case of case 3 is equivalent to handling case 1. Think about what should happen if it sees the temporary transaction timestamp of a different uncommitted transaction. + +#### Examples + +Suppose `txn9` updates tuple A, which means tuple A's timestamp will be set to `TXN_START_ID + 9`. When `txn9` eventually commits, tuple A's timestamp will be replaced with `txn9`'s commit timestamp. In code, this will look like the following: + +1. `txn9` modifies tuple A. +2. `txn9` uses `GetTransactionTempTs` to set tuple A's timestamp to `TXN_START_ID + 9`. +3. When `txn9` commits, it replaces tuple A's timestamp with its new commit timestamp via `GetCommitTs`. +4. Other transactions can use tuple A's timestamp to distinguish between whether it has been modified by a committed or uncommitted transaction. +5. You can do this by simply comparing the timestamp with `TXN_START_ID`. + +Here's our final example for `SeqScanExecutor`. To make our illustration easier to understand, `TXN_START_ID` in the below example will be 1000 instead of (`1 << 62`). Therefore, 1009 represents the temporary transaction timestamp where the transaction has an ID of 9 (`TXN_START_ID + 9 = 1009` in this example). + +Let's take a look at the following example, where we traverse the version chain to collect the undo logs to construct the tuples that the user requests: + +![](https://15445.courses.cs.cmu.edu/fall2025/project4/img/2-3-seqscan.png) + +Suppose we have a transaction with ID 9 and read timestamp of 3. `txn9` has not yet committed (due to the presence of the temporary transaction timestamp 1009). The result of a sequential scan in `txn9` of the table should be: `[(A, 9), (B, 9), (C, 2), (D, 9)]`. For all of the tuples except `(C, 2)`, transaction 9 was the one that already modified them, so it does not need to traverse the undo logs. However, `(C, 2)` has a commit timestamp of 4, which is greater than our read timestamp of 3. Transaction 9 then knows to traverse the undo logs to find the first version of this tuple that has a commit timestamp less than or equal to 3. + +Consider some other transaction that has a read timestamp of 4. The result of a sequential scan of this transaction will be: `[(A, 3), (B, 3), (C, 4)]`. For `(A, 3)` and `(B, 3)`, the table heap contains a pending update from `txn9`, so the transaction will need to traverse the version chain to get the last update before/at timestamp 4. `(C, 4)` is the latest update at read timestamp 4. `(D, 9)` is a pending update by transaction 9, and since it does not have a version chain, we do not need to return it. In general, if there are no previous versions of a tuple at a given read timestamp, then the transaction should treat it as if the tuple does not exist. + +These example are oversimplified compared with the test cases. You will also need to think about NULL data and data types other than integers when implementing `SeqScanExecutor`. + +Once you have finished implementing `CollectUndoLogs` and `ReconstructTuple`, it should be clear how to finish the MVCC version of `SeqScanExecutor` using these two functions. The base tuple, tuple metadata, and the first undo link belonging to the tuple can be obtained via `GetTupleAndUndoLink`. + +Our test cases will manually set up some transactions and the table heap content. You do not need to implement the insert executor to test your sequential scan implementation. At this point, you should pass all test cases in `TxnScanTest`. + +--- + +## Task #3 - MVCC Executors + +In this section, you will need to implement the data modification executors. This includes the insert executor, delete executor, and update executor. Starting from this task, your implementation will not be compatible with Project #3, as we only support schemas of fixed-sized data types. + +### 3.1 Insert Executor + +Your insert executor implementation should be similar to the one in Project #3. You can create a new tuple in the table heap, and you will need to correctly construct the tuple's metadata. The timestamp in the table heap should be set to the transaction temporary timestamp, as described in Task 2.2. You should also add the RID to the write set via `AppendWriteSet` at this point. Here is a simple illustration of `txn9` inserting (D, 3) into the table: + +![](https://15445.courses.cs.cmu.edu/fall2025/project4/img/3-1-insert.png) + +We have provided the helper functions `UpdateTupleInPlace` and `UpdateUndoLink` to update the tuple in the table heap and the undo link respectively. These functions mimic an atomic compare-and-swap operation, where you will need to provide a check function. The pseudo code for the two functions are as below: + +```cpp +UpdateUndoLink(rid, new_undo_link, check_function) { + take the table heap lock / undo link lock + retrieve the data from table heap / undo link + call user-provided check function, if check failed, return false + update the data and return true +} + +``` + +All test cases for this task are single-threaded, and therefore you can simply pass a `nullptr` to the check parameter to skip the check and use `UpdateTupleInPlace` and `UpdateUndoLink` separately. For the future concurrent test cases, you will need to get / update both the `Tuple` and the `UndoLink` atomically so that other transactions cannot change another transaction's intermediate result. Consider the following scenario: + +1. `txn1` calls `GetTuple` to check whether it can update a tuple. +2. `txn2` modifies the tuple and its `UndoLink`. +3. `txn1` then calls `GetUndoLink` and gets the incorrect `UndoLink`. +4. `txn1` updates the tuple and `UndoLink` based on incorrect information. + +In this example, you might want to use `UpdateTupleAndUndoLink` and `GetTupleAndUndoLink` to get/set atomically (`src/concurrency/transaction_manager_impl.cpp`). Starting at Task 4.2, you may need to implement the check logic to detect write-write conflicts when there are multiple threads updating a tuple and its metadata / `UndoLink` concurrently. + +### 3.2 Commit + +Only one transaction is allowed to execute the `Commit` function at a time, and you should ensure this by using the `commit_mutex_` in the transaction manager. In this task, you will need to extend your `Commit` implementation in the transaction manager with transaction commit logic. Here is some rough pseudocode: + +1. Take the commit mutex. +2. Obtain a commit timestamp (you will likely need to do a `.load() + 1` instead of `.fetch_add(1)` here, think about why in relation to `Begin`). +3. Iterate through every tuple that has been modified by this transaction (via the write set) and set the timestamp of the base tuples to the commit timestamp. You will need to maintain the write set in all modification executors (insert, update, delete). +4. Set the transaction to the `COMMITTED` state. +5. Update the commit timestamp of the transaction. +6. Update `last_committed_ts_` (you can do the `.fetch_add(1)` here). + +![](https://15445.courses.cs.cmu.edu/fall2025/project4/img/3-2-commit.png) + +You should have implemented most of the above logic as a part of task 1, so you will just need to add the iterating table logic. + +#### TxnMgrDbg + +At this point, we strongly recommend that you implement the debug function `TxnMgrDbg`. This should print out the table heap content and the version chain for each tuple. If you come to us without this function written, we will ask you to implement it first. + +Our test cases will call your debug function after each important operation and you can print anything you want to examine the version chain. This debug function will be incredibly helpful in debugging your implementation for future tests. An example debug function can be seen in our reference solution running in the BusTub Web Shell with the command `\dbgmvcc {table_name}` (you can find the developer console on Chrome by pressing F12). + +Our debug function is prettier than what yours needs to be. Your version can look like this (taken from `CollectUndoLogTest` in `txn_scan_test`): + +On the BusTub Web Shell, `\dbgmvcc` looks like this: + +#### Interactive Testing + +Here is an example of using the BusTub Web Shell to compare your implementation against ours. + +```bash +make -j`nproc` shell && ./bin/bustub-shell +bustub> CREATE TABLE t1(v1 int, v2 int); +bustub> INSERT INTO t1 VALUES (1, 1), (2, 2), (3, 3); +bustub> \dbgmvcc t1 -- call your `TxnMgrDbg` function to dump the version chain +bustub> BEGIN; +txn?> INSERT INTO t1 VALUES (4, 4); +txn?> \txn -1 +bustub> SELECT * FROM t1; -- the newly-inserted row should not be visible to other txns +bustub> \txn ? -- use the id you see before +txn?> COMMIT; + +``` + +You can also use the BusTub Netcat shell to start an interactive session with transactions. You will need to install `nc` (netcat) in order to use this interactive shell. + +```bash +make -j`nproc` nc-shell && ./bin/bustub-nc-shell +bustub> CREATE TABLE t1(v1 int, v2 int); +bustub> INSERT INTO t1 VALUES (1, 1), (2, 2), (3, 3); +bustub> \dbgmvcc t1 -- call your `TxnMgrDbg` function to dump the version chain +# in another terminal +nc 127.0.0.1 23333 +bustub> INSERT INTO t1 VALUES (4, 4); +# in yet another terminal +nc 127.0.0.1 23333 +bustub> SELECT * FROM t1; -- the newly-inserted row should not be visible to this txn +bustub> COMMIT; + +``` + +We provide the reference solution running in your browser in the BusTub Web Shell. + +Starting from this task, all of our test cases are written in SQL. As long as your SQL query result matches the reference output, you will get full points for a test case. We do not check the exact content of your version chain, but we will check for the number of `UndoLogs` and number of table heap tuples to ensure you are maintaining the version chain correctly and efficiently. We will also use your `ReconstructTuple` to verify the correctness of your generated `UndoLogs`. + +### 3.3 Generate Undo Log + +Before implementing the update and delete executors, you will need to implement `GenerateNewUndoLog` and `GenerateUpdatedUndoLog`. Given the original base tuple and the modified target tuple, you should return the `UndoLog` that should be stored in the transaction making the modification. + +Make sure you understand the difference between `GenerateNewUndoLog` and `GenerateUpdatedUndoLog`. Suppose a transaction updates a tuple several times. We expect only one `UndoLog` for every update within a specific transaction. `GenerateNewUndoLog` is used for every first modification of each tuple. After that, `GenerateUpdatedUndoLog` is used to consolidate modifications into one `UndoLog`. + +There are three cases to consider: + +* **Update:** In this case, generate the `UndoLog` based on base tuple and target tuple. If this is not the first update within this transaction, combine it with the original `UndoLog` via `GenerateUpdatedUndoLog`. A transaction should hold at most one undo log for each RID. If a transaction needs to update a tuple twice, it should only update the base tuple and its current undo log. +* **Delete:** In this case, you need to store the entire tuple in the `UndoLog` so that the entire tuple can be reconstructed. Think about how you would achieve this with several modifications within a transaction. +* **Insert:** The insertion you implemented in Task 3.1 always creates a new tuple in the table heap with a new RID. In other words, you never need a `UndoLog` for an insertion. However, in Task 4.2, you might have to insert a tuple back into a deleted tuple. You will know it's an insert case when the `base_tuple` is a `nullptr`. + +You will find `Tuple::IsTupleContentEqual` and `Value::CompareExactlyEquals` useful when computing the `UndoLog`. + +### 3.4 Update & Delete Executor + +In this task, you will need to implement the logic that actually generates `UndoLogs` and updates the table heap base tuples. The update and delete executors are quite similar. + +Before updating or deleting a tuple, you will need to check for **write-write conflicts**. There are a few cases to be aware of. If a tuple is being modified by an uncommitted transaction, no other transactions are allowed to modify it. If they do, there will be a write-write conflict and the transaction conflicting with a previous transaction should be aborted. Another write-write conflict case is when a transaction A deletes a tuple and commits, and another transaction B that starts before A deletes the same tuple after transaction A has committed. The transaction state should be set to `TAINTED` when a write-write conflict is detected, and you will need to throw an `ExecutionException` in order to mark the SQL statement as failed. `ExecuteSqlTxn` will return false if there is an execution exception. At this point, we do not require you to implement the actual abort logic. The test cases in this task will not call the `Abort` function. + +Your update executor should be implemented as a pipeline breaker: it should first store all tuples from the child executor to a local buffer before writing any updates. After that, it should pull the tuples from the local buffer, compute the updated tuple, and then perform the updates on the table heap. + +At this point, all test cases are single-threaded, and therefore you do not need to think hard about race conditions that might occur during the update / delete process. The only condition for detecting write-write conflict is to check the timestamp of the base tuple metadata. + +![](https://15445.courses.cs.cmu.edu/fall2025/project4/img/3-4-write-conflict.png) + +Let's go through the example above, where we show the 3 different cases you will need to handle before making any changes. + +1. In case (1), `txn10` has deleted the (A, 2) tuple and has not committed yet. Suppose `txn9` has a read timestamp of 3. `txn9` can then still read the old version of the tuple (A, 2). +2. In case (2), if any other transactions other than `txn9` try to update / delete this B tuple, they will need to abort. For example, if `txn10` eventually needs to update / delete the tuple, `txn10` should be aborted with a write-write conflict. +3. In case (3), there was some other transaction that updated (C, 2) to (C, 4) with a commit timestamp of 4. `txn9` can read an old version of the tuple (C, 2). Again, if `txn9` eventually needs to update / delete the tuple, `txn9` should be aborted with a write-write conflict, because there is a newer update that happens after the transaction read timestamp. +4. There is also a 4th case, where a transaction wants to update a modification it made itself (self modification). If a tuple has already been modified by the current transaction, you should not regard this as write-write conflict. + +After checking the write-write conflict (you should write a helper function for that), you can proceed with implementing the update / delete logic: + +1. Create the undo log for the modification by `GenerateNewUndoLog` or `GenerateUpdatedUndoLog`. +2. Update the next undo link of the tuple to point to the new undo log. +3. Update the base tuple and metadata in the table heap (this step can be done with the previous step atomically using `UpdateTupleAndUndoLink`). + +Here's an example illustrating a delete: + +![](https://15445.courses.cs.cmu.edu/fall2025/project4/img/3-4-delete.png) + +Here's an example illustrating an update: + +![](https://15445.courses.cs.cmu.edu/fall2025/project4/img/3-4-update.png) + +Make sure you understand these diagrams before you start implementing your executors. If you have any questions, please ask us for clarifications! + +In the example below, `txn9` first updates the tuple to (A, 4), then to (A, 5), then to (B, 5), then to (A, 5), and then finally deletes it. Throughout the process, `txn9` keeps exactly one `UndoLog` for the tuple. When we update (B, 5) to (A, 5), we could have gone all the way back to the beginning of the transaction to compute the partial update (_, 5) (since combining all of the deltas gets you from (A,3) to (A, 5)). However, we recommend simply adding modifications to the existing `UndoLog` (so that it has the full change (A, 5)), which will make it easier to handle concurrency issues. In other words, make sure you only add / update data in the undo log, and do not remove data. + +![](https://15445.courses.cs.cmu.edu/fall2025/project4/img/3-4-update-2.png) + +In this next example, `txn9` inserts a tuple, makes several modifications, and then removes it. In this case, you can directly modify the table heap tuple without generating any undo logs. + +![](https://15445.courses.cs.cmu.edu/fall2025/project4/img/3-4-update-3.png) + +We have set the commit timestamp to 0 at the end because this tuple is inserted by `txn9` and removed by `txn9`, which means that it never actually existed. If the version chain did contain undo logs, it should be set to the actual commit timestamp instead of 0 so that the undo logs can be accessed with a transaction with lower read timestamp. You could also just ignore this case and follow the usual commit logic. As long as you can read the correct data at each timestamp, this does not matter until Task #4.4. + +In this project, we will always use fixed-sized types, and therefore `UpdateTupleInPlace` should always succeed without throwing an exception. + +Putting everything together for update / deletes, you should: + +1. Get the RID from the child executor. +2. Generate the updated tuple. +3. For self-modification, update the table heap tuple, and optionally the undo log in the current transaction if there is one. +4. Otherwise, generate the undo log, and link them together. + +At this point, you should pass everything in the `TxnExecutorTest` test besides the garbage collection test case. + +### 3.5 Stop-the-world Garbage Collection + +In the code we have given you, once we have added the transaction into the transaction map, we never remove it. We do this because transactions with a lower read timestamp might need to read the undo logs stored in the previous committed or aborted transactions. However, imagine if we have had thousands or even millions of transactions. It is likely that many of the past transactions have been completely overwritten by more recent ones, and we no longer need to store their undo logs. In this task, you will need to implement a simple garbage collection strategy that removes unused transactions. + +Garbage collection is triggered manually when `GarbageCollection` is called. The test cases will only call this function when all transactions are paused. Therefore, you do not need to worry too much about race conditions when doing garbage collection. In Task 1, you have already implemented an algorithm to compute the watermark (the lowest read timestamp in the system). In this task, you will need to remove all transactions that do not contain any undo logs visible to a transaction with the lowest read timestamp. + +You will need to traverse the table heap and the version chain to identify undo logs that are still accessible by a transaction with the lowest read timestamp (make sure that you understand this: an undo log that is invisible to this transaction should be invisible to all transactions). If a transaction is committed / aborted, and does not contain any undo logs visible to a transaction with the lowest read timestamp, you can simply remove it from the transaction map. + +The example below illustrates the case where the watermark timestamp is 3 and we have `txn1`, `txn2`, and `txn9` committed. `txn1`'s undo logs are no longer accessible because every undo log with commit timestamp 1 has been overwritten by updates with commit timestamps less than or equal to 3. Thus we can directly remove `txn1`. `txn2`'s undo log for tuple (A, 2) is not accessible, but its undo log for tuple (C, 2) is still accessible because there has been no additional updates, so we cannot remove it right now. + +![](https://15445.courses.cs.cmu.edu/fall2025/project4/img/3-5-garbage-collection.png) + +After removing `txn1`, there will be dangling pointers to a removed undo log, as indicated in dashed lines. You **DO NOT** need to update the previous undo log to modify the dangling pointer and make it an invalid pointer, and it is fine to leave it there for this project. If everything in your implementation is correct, your sequential scan executor should never even attempt to dereference these dangling pointers, as they are below the watermark. However, we still recommend you to add some asserts in your code to ensure this will never happen. + +At this point, you should pass the `TxnExecutorTest`. + +### 3.6 Abort + +Before this task, transactions that go into the `TAINTED` state will cause other transactions to abort on the write-conflicting tuples. In this task, you are required to implement the abort logic, so that we can continue modifying the tuples when any of the transactions aborts. Recall that we detect write-write conflicts by checking if there is an ongoing modification to a tuple. When aborting a transaction, we should revert this change, so that other transactions can write to the tuple. + +You can choose which design you want to implement in this task. + +#### Implementation #1 + +![](https://15445.courses.cs.cmu.edu/fall2025/project4/img/5-abort-2.png) + +In this example, we are going to abort `txn9`. You can simply undo the tuple and set the table heap to the original value. This is easier to implement and will leave your version chain with two items with timestamp 3. Your sequential scan / index scan executor should correctly handle this situation after the transaction is aborted. + +With this implementation, aborted transactions will have undo logs in the version chain, and cannot be immediately reclaimed in garbage collection. + +#### Implementation #2 + +![](https://15445.courses.cs.cmu.edu/fall2025/project4/img/5-abort.png) + +In this example, aborting `txn9` will atomically link the undo link to the previous version and update the table heap. You will need to use `UpdateTupleAndUndoLink` / `GetTupleAndUndoLink` to update / read tuples and undo links atomically. With this implementation, you do not need to wait until the watermark before removing the aborted transaction from the transaction map. + +If the transaction inserts a fresh new tuple without undo logs, the abort process simply sets it to a deletion marker with ts = 0. The commit timestamp in BusTub starts from 1, and therefore setting it to 0 will be safe. + +You do not need to revert index modifications. Anything added to the index will stay there and will not be removed. You also do not need to actually remove a tuple from the table heap. If you need to revert an insertion, simply set it to a deletion marker. + +You should allow multiple threads aborting in parallel. That is, do not take the `commit_mutex` or any other locks throughout the whole function. + +--- + +## Task #4 - Primary Key Index + +BusTub supports primary key indexes, which can be created in the following way: + +```sql +CREATE TABLE t1(v1 int PRIMARY KEY); +CREATE TABLE t1(v1 int, v2 int, PRIMARY KEY(v1, v2)); + +``` + +When the primary key is specified in a `CREATE TABLE` statement, BusTub will automatically create an index with its `is_primary_key` property set to true. In BusTub, a table can have at most one primary key index. Primary key indexes ensure the uniqueness of the primary key. In this task, you will need to handle primary key indexes in your MVCC executors. The test cases will not create secondary indexes using `CREATE INDEX`, and thus you do not need to maintain secondary indexes in this task. + +### 4.1 Index Insert + +You will need to modify your insert executor to correctly handle the primary key index. At the same time, you will also need to think about the case where multiple transactions are inserting the same primary key from multiple threads. Inserting into an index can be done with the following steps: + +1. First, check if the tuple already exists in the index. If it exists, abort* the transaction. +* *This only applies to Task 4.1. If you are going to implement Task 4.2, then it is possible that the index points to a deleted tuple, and in this case, you should not abort.* + + +2. You only need to set the transaction state to `TAINTED` in Task 4. `TAINTED` means that the transaction is about to be aborted, but the data has not been cleaned up yet. You do not need to implement the actual abort process until Task #3.6. The tainted transaction will leave some tuple in the table heap, and you do not need to clean it up. When another transaction inserts into the same place and detects a write-write conflict, it should still be regarded as a conflict. After setting the transaction to `TAINTED` state, you will also need to throw an `ExecutionException` so that `ExecuteSql` will return false and the test case will know that the transaction / SQL was aborted. +3. Next, create a tuple on the table heap with a temporary transaction timestamp. +4. After that, insert the tuple into the index. Your index should return false if the unique key constraint is violated. + +Between steps (1) and (3), it is possible that other transactions are doing the same thing. A new entry could be created in the index before the current transaction could create it. In this case, you will need to abort the transaction, and there will be a tuple in the table heap that is not referenced by any entry in the index. + +In this example, let us go through `txn9` attempting to insert A, B, and C separately (assuming the only column of the tuple is the primary key). Assume that A already exists in the index, and C has been inserted by an uncommitted transaction. We have removed the `PageVersionInfo` structure in the diagram for clarity. + +![](https://15445.courses.cs.cmu.edu/fall2025/project4/img/4-1-insert-index.png) + +1. **Inserting A:** the key already exists in the index, violating the uniqueness requirement for primary key, thus aborting the transaction. +2. **Inserting B:** as there is no conflict in the index, first create a tuple in the table heap, and then insert the RID of the newly-created tuple into the index. +3. **Inserting C:** we assume here that there is another `txn10` also trying to insert C. `txn9` first detects no conflict in the index and creates a tuple in the table heap. Then, in the background, `txn10` does (2) and (3), creating a tuple and updating the index. When `txn9` tries inserting into the index in step (4), there will be a unique key violation reported by the index, and therefore `txn9` should go into the `TAINTED` state. + +You do not need to implement the MVCC index scan executor at this point. Our test case will use range queries instead of equal queries to avoid the sequential scan to index scan rule being invoked, so that sequential scans will not be converted to index scans. + +Once you finish this, you should pass the first concurrent test case in this project, where we test if your implementation works correctly when multiple threads insert the same key. + +### 4.2 Index Scan, Delete, & Update + +In this task, you will need to add index support for the delete and update executors, as well as the MVCC index scan executor. + +Once an entry is created in the index, it will always point to the same RID and will **NOT** be removed even if a tuple is marked deleted. We do this so that an earlier transaction can still access the history with the index scan executor. To support this, you will need to revisit your insert executor. Consider the case that the insert executor inserts into a tuple which has been removed by the delete executor. Your implementation should update the deleted tuple instead of creating a new entry, because an index entry always points to the same RID once created. You will need to correctly handle the write-write conflict detection and unique constraint detection. + +In this example, tuple (B, 2) has been deleted by a transaction with commit timestamp 3. We **DO NOT** remove the entry from the index when a tuple is deleted, and therefore the index may point to a deletion marker, and will **ALWAYS** point to the same RID once it is there. When `txn9` inserts (B, 3) into the table with the insert executor, it should **NOT** create a new tuple. Instead, it should update the deletion marker to the inserted tuple, as if it were an update. + +![](https://15445.courses.cs.cmu.edu/fall2025/project4/img/4-2-update-index.png) + +You will also need to think about other race conditions at this point. For example, if multiple transactions are updating the `UndoLink` at the same time. You should correctly abort some of them and let exactly one of them proceed without losing any data. Starting from this task, you will need to use the atomic helper function `UpdateTupleAndUndoLink`/`GetTupleAndUndoLink` and pass in the check function to avoid race conditions. + +You should observe in the above example, there will be a small amount of time when the table heap contains a (deleted) tuple with the same timestamp as the first undo log. Your sequential scan executor should also handle this case correctly after you have implemented updates and deletes. + +### 4.3 Primary Key Updates + +You will need to handle when the primary key gets updated. In this case, the update should be implemented as a delete on the original key and an insert on the new key. + +Let us go through the case where `txn9` is executing `UPDATE table SET col1 = col1 + 1` in order, where `col1` is the primary key. `txn9` first inserts (2, B) (along with any tuples that have new primary keys) into the table: + +![](https://15445.courses.cs.cmu.edu/fall2025/project4/img/4-3-update-pk.png) + +Now we start updating the table with `col1 = col1 + 1`, where we delete all tuples that will be updated: + +![](https://15445.courses.cs.cmu.edu/fall2025/project4/img/4-3-update-pk-2.png) + +Next, we insert the updated tuple back to the table with new primary keys: + +![](https://15445.courses.cs.cmu.edu/fall2025/project4/img/4-3-update-pk-3.png) + +Finally, we commit the changes: + +![](https://15445.courses.cs.cmu.edu/fall2025/project4/img/4-3-update-pk-4.png) + +That's all there is to it! + +### 4.4 Serializable Verification + +If a transaction runs in serializable isolation level, you will need to verify if it satisfies the serializability when committing the transaction. We use OCC backward validation for serializable verification. The verification method we talked about in the lecture only applies to a static database. In BusTub, you will need to consider newly-inserted and deleted records. To complete the serializable verification, you will need to store the scan filter (aka. scan predicate) in the transaction each time the sequential scan executor or the index scan executor are called. You will also need to track the write set correctly. With all the information, we can do serializable verification by checking if the scan predicate (read set) intersects with the write set of transactions that starts after the current transaction starts, as follows when we commit a transaction: + +1. You do not need to verify a read-only transaction. +2. Collect all transactions that commits after the read timestamp of the current transaction. We call these “conflict transactions”. +3. Collect all RIDs that are modified by conflict transactions. +4. For each tuple, iterate through its version chain to verify if the current transaction reads any “phantom”. You can collect all undo logs up to the transaction read timestamp. Then, replay it one by one to check the intersection. +* For each update in the version chain, +* For **insert**, you should check if the new tuple satisfies any of the scan predicates of the current transaction. If yes, abort. +* For **delete**, you should check if the deleted tuple satisfies any of the scan predicates of the current transaction. If yes, abort. +* There is an edge case where a transaction inserts and then removes a tuple, which leaves a delete marker in the table heap. This should be regarded as a no-op instead of a delete. +* For **update**, you should check both the “before image” and the “after image”. If any of them overlaps with any of the scan predicates of the current transaction, abort. +* Consider the case that a transaction modifies a tuple but then reverts it back, which leaves an undo log that updates some columns to the same value. In this case, you should still process it as an identical update instead of ignoring it, and abort the transaction if necessary. +* However, if there are two transactions, where the first one modifies the value from X to Y, and then, the second one, Y to X, you should still detect the conflicts that X is changed, if there is a txn3 starting before txn1 starts and committing after txn2 commits. + + +5. If a transaction needs to be aborted in the commit phase, you should directly go through the abort logic to revert the changes, and set the transaction status to `ABORTED` instead of `TAINTED`. + +This verification method is inefficient because (1) only one transaction can enter the verification process (2) we loop over all write sets of possible-conflicting transactions and evaluate scan predicates on that. You may consider implementing parallel verification, or precision locking (attribute-level checking instead of checking the record), in leaderboard tests. + +To test your implementation using BusTub shell, + +```bash +./bin/bustub-shell +bustub> set global_isolation_level=serializable; + +``` + +For BusTub Netcat shell, + +```bash +./bin/bustub-nc-shell --serializable + +``` + +--- + +## Leaderboard Benchmark - T-NET, the Terrier NFT Exchange Network + +In a galaxy far, far away, there is a universe in which Jack Russell terriers live in a highly-civilized society. We say that the society is highly civilized, except that NFTs (non-fungible tokens) are becoming increasingly popular. One day, the terriers decide to find a database system to track their NFTs, and BusTub is one of their candidate systems. + +### Benchmark #1 - Token Transfer over T-NET / Snapshot Isolation + +Terriers transfer their NFTs over T-NET. T-NET works like bank transfers: one terrier can initiate a transfer of a number of NFTs to another terrier. For this scenario, the transactions will be running in snapshot isolation mode. + +```sql +CREATE TABLE terriers(terrier int primary key, token int); +-- each transaction: transfer A tokens from X to Y +UPDATE terriers SET token = token + A WHERE terrier = X; +UPDATE terriers SET token = token - A WHERE terrier = Y; + +``` + +### Benchmark #2 - Trading-Network over T-NET / Serializable + +When transferring NFTs on T-NET, terriers will be charged for transfer fees. The transfer fees will be waived if two terriers are on the same trading network. The network is represented by an integer ID. + +```sql +CREATE TABLE terriers(terrier int primary key, token int, network int); +-- each transaction: transfer A tokens from X to Y +X_network = SELECT network FROM terriers WHERE terrier = X; +Y_network = SELECT network FROM terriers WHERE terrier = Y; +UPDATE terriers SET token = token + A * 0.97 WHERE terrier = X; -- if X_network != Y_network +UPDATE terriers SET token = token + A WHERE terrier = X; -- if X_network == Y_network +UPDATE terriers SET token = token - A WHERE terrier = Y; + +``` + +At the same time, terriers can invite others to join their network with a sign-on bonus: + +```sql +-- X invites Y to join the network +A = SELECT network FROM terriers WHERE terrier = X; +UPDATE terriers SET network = A, token = token + 1000 WHERE terrier = Y; + +``` + +Terriers can also start their own network with a network registration fee. + +```sql +-- X starts a new trading network +UPDATE terriers SET network = ?, token = token - 1000 WHERE terrier = X; + +``` + +All transactions in this benchmark will run at serializable level. + +Due to how T-NET works, it is possible that a terrier can own a negative amount of NFTs. + +You might need to implement a more fine-grained garbage collection when sequential scan is running or on transaction commit / abort. The leaderboard test will not call the stop-the-world garbage collector you have implemented in Task 3. Some of our test cases need to access `commit_ts` after commit, and therefore you can clear the undo buffer instead of removing the transaction from the map when doing fine-grained garbage collection instead of removing it as in stop-the-world garbage collection. + +Implementing a more efficient serializable verification (i.e., precision locking) might be helpful in leaderboard benchmarks. It might also be helpful to implement parallel serializable verification. + +You will be ranked on speed of transfers and space usage of the database system respectively. The speed of transfers is measured by the throughput of the system, and the space usage is measured by the total number of rows in table tuples and undo logs in the system. There will be a background thread collecting number of rows in the system periodically, and the space usage is computed with the maximum number of rows at any time throughout the benchmark. The final leaderboard bonus score will be computed as: `min{speed_rank_bonus+space_rank_bonus, leaderboard_maximum_bonus}`. For each ranking, you will get 25 points for the 1st place, 15 points for 2nd-10th place, and 5 points for 11th-20th place. + + +## Instructions + +### Formatting + +Your code must follow the Google C++ Style Guide. We use Clang to automatically check the quality of your source code. Your project grade will be zero if your submission fails any of these checks. + +Execute the following commands to check your syntax. The `format` target will automatically correct your code. The `check-lint` and `check-clang-tidy-p4` targets will print errors and instruct you how to fix it to conform to our style guide. + +```bash +$ make format +$ make check-lint +$ make check-clang-tidy-p4 + +``` + +### Memory Leaks + +For this project, we use LLVM Address Sanitizer (ASAN) and Leak Sanitizer (LSAN) to check for memory errors. To enable ASAN and LSAN, configure CMake in debug mode and run tests as you normally would. If there is memory error, you will see a memory error report. Note that macOS only supports address sanitizer without leak sanitizer. + +In some cases, address sanitizer might affect the usability of the debugger. In this case, you might need to disable all sanitizers by configuring the CMake project with: + +```bash +$ cmake -DCMAKE_BUILD_TYPE=Debug -DBUSTUB_SANITIZER= .. + +``` + +### Development Hints + +* You can use `BUSTUB_ASSERT` for assertions in debug mode. The statements within `BUSTUB_ASSERT` will NOT be executed in release mode. If you have something to assert in all cases, use `BUSTUB_ENSURE` instead. +* We encourage you to use a graphical debugger to debug your project if you are having problems. +* If you are having compilation problems, running `make clean` does not completely reset the compilation process. You will need to delete your build directory and run `cmake ..` again before you rerun `make`. +* Post all of your questions about this project on Piazza. Do not email the TAs directly with questions. +