TransformerLensOrg · styme3279 · Jul 28, 2025 · Jul 28, 2025 · Jul 28, 2025 · Jul 28, 2025
diff --git a/ARENA_Balanced_Brackets.py b/ARENA_Balanced_Brackets.py
diff --git a/ARENA_Indirect_Object_Identification.py b/ARENA_Indirect_Object_Identification.py
diff --git a/ARENA_files/__pycache__/arena_balanced_bracket_classifier_datasets.cpython-310.pyc b/ARENA_files/__pycache__/arena_balanced_bracket_classifier_datasets.cpython-310.pyc
diff --git a/ARENA_files/__pycache__/arena_part41_ioi_tests.cpython-310.pyc b/ARENA_files/__pycache__/arena_part41_ioi_tests.cpython-310.pyc
diff --git a/ARENA_files/__pycache__/arena_plotly_utils.cpython-310.pyc b/ARENA_files/__pycache__/arena_plotly_utils.cpython-310.pyc
diff --git a/ARENA_files/__pycache__/part51_balanced_bracket_classifier_tests.cpython-310.pyc b/ARENA_files/__pycache__/part51_balanced_bracket_classifier_tests.cpython-310.pyc
diff --git a/ARENA_files/arena_balanced_bracket_classifier_datasets.py b/ARENA_files/arena_balanced_bracket_classifier_datasets.py
@@ -0,0 +1,115 @@
+# %%
+
+from typing import Union
+
+import torch as t
+from jaxtyping import Int
+from torch import Tensor
+
+device = t.device("cpu")
+
+MAIN = __name__ == "__main__"
+# %%
+
+
+class SimpleTokenizer:
+    START_TOKEN = 0
+    PAD_TOKEN = 1
+    END_TOKEN = 2
+    base_d = {"[start]": START_TOKEN, "[pad]": PAD_TOKEN, "[end]": END_TOKEN}
+
+    def __init__(self, alphabet: str):
+        self.alphabet = alphabet
+        # the 3 is because there are 3 special tokens (defined just above)
+        self.t_to_i = {**{c: i + 3 for i, c in enumerate(alphabet)}, **self.base_d}
+        self.i_to_t = {i: c for c, i in self.t_to_i.items()}
+
+    def tokenize(self, strs: list[str], max_len=None) -> Int[Tensor, "batch seq"]:
+        def c_to_int(c: str) -> int:
+            if c in self.t_to_i:
+                return self.t_to_i[c]
+            else:
+                raise ValueError(c)
+
+        if isinstance(strs, str):
+            strs = [strs]
+
+        if max_len is None:
+            max_len = max((max(len(s) for s in strs), 1))
+
+        ints = [
+            [self.START_TOKEN]
+            + [c_to_int(c) for c in s]
+            + [self.END_TOKEN]
+            + [self.PAD_TOKEN] * (max_len - len(s))
+            for s in strs
+        ]
+        return t.tensor(ints)
+
+    def decode(self, tokens) -> list[str]:
+        assert tokens.ndim >= 2, "Need to have a batch dimension"
+
+        def int_to_c(c: int) -> str:
+            if c < len(self.i_to_t):
+                return self.i_to_t[c]
+            else:
+                raise ValueError(c)
+
+        return [
+            "".join(
+                int_to_c(i.item()) for i in seq[1:] if i != self.PAD_TOKEN and i != self.END_TOKEN
+            )
+            for seq in tokens
+        ]
+
+    def __repr__(self) -> str:
+        return f"SimpleTokenizer({self.alphabet!r})"
+
+# %%
+
+tokenizer = SimpleTokenizer("()")
+
+class BracketsDataset:
+    """A dataset containing sequences, is_balanced labels, and tokenized sequences"""
+
+    def __init__(self, data_tuples: list):
+        """
+        data_tuples is list[tuple[str, bool]] signifying sequence and label
+        """
+        self.tokenizer = SimpleTokenizer("()")
+        self.strs = [x[0] for x in data_tuples]
+        self.isbal = t.tensor([x[1] for x in data_tuples])
+        self.toks = self.tokenizer.tokenize(self.strs)
+        self.open_proportion = t.tensor([s.count("(") / len(s) for s in self.strs])
+        self.starts_open = t.tensor([s[0] == "(" for s in self.strs]).bool()
+
+    def __len__(self) -> int:
+        return len(self.strs)
+
+    def __getitem__(self, idx) -> "BracketsDataset | tuple[str, t.Tensor, t.Tensor]":
+        if isinstance(idx, slice):
+            return self.__class__(list(zip(self.strs[idx], self.isbal[idx])))
+        return (self.strs[idx], self.isbal[idx], self.toks[idx])
+
+    def to(self, device) -> "BracketsDataset":
+        self.isbal = self.isbal.to(device)
+        self.toks = self.toks.to(device)
+        self.open_proportion = self.open_proportion.to(device)
+        self.starts_open = self.starts_open.to(device)
+        return self
+
+    @property
+    def seq_length(self) -> int:
+        return self.toks.size(-1)
+
+    @classmethod
+    def with_length(
+        cls, data_tuples: list[tuple[str, bool]], selected_len: int
+    ) -> "BracketsDataset":
+        return cls([(s, b) for (s, b) in data_tuples if len(s) == selected_len])
+
+    @classmethod
+    def with_start_char(
+        cls, data_tuples: list[tuple[str, bool]], start_char: str
+    ) -> "BracketsDataset":
+        return cls([(s, b) for (s, b) in data_tuples if s[0] == start_char])
diff --git a/ARENA_files/arena_part41_ioi_tests.py b/ARENA_files/arena_part41_ioi_tests.py
@@ -0,0 +1,32 @@
+
+import sys
+from pathlib import Path
+from typing import Callable
+
+import torch as t
+
+# Make sure exercises are in the path
+if str(exercises_dir := Path(__file__).parent.parent) not in sys.path:
+    sys.path.append(str(exercises_dir))
+
+
+def test_logits_to_ave_logit_diff(logits_to_ave_logit_diff: Callable):
+    batch = 4
+    seq = 5
+    d_vocab = 6
+    logits = t.randn(batch, seq, d_vocab)
+    answer_tokens = t.randint(0, d_vocab, (batch, 2))
+
+    actual = logits_to_ave_logit_diff(logits, answer_tokens, per_prompt=True)
+    # expected = solutions.logits_to_ave_logit_diff(logits, answer_tokens, per_prompt=True)
+    final_logits = logits[:, -1, :]
+    answer_logits = final_logits.gather(dim=-1, index=answer_tokens)
+    correct_logits, incorrect_logits = answer_logits.unbind(dim=-1)
+    expected = correct_logits - incorrect_logits
+    t.testing.assert_close(actual, expected)
+
+    actual = logits_to_ave_logit_diff(logits, answer_tokens)
+    # expected = solutions.logits_to_ave_logit_diff(logits, answer_tokens)
+    t.testing.assert_close(actual, expected.mean())
+
+    print("All tests in `test_logits_to_ave_logit_diff` passed!")