Skip to content

Commit 9f449a0

Browse files
committed
refactor: hard-delete dataset splits (#9668)
1 parent 248bef0 commit 9f449a0

File tree

6 files changed

+132
-178
lines changed

6 files changed

+132
-178
lines changed

scripts/ddl/postgresql_schema.sql

Lines changed: 118 additions & 129 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,44 @@ CREATE TABLE public.dataset_splits (
2828
metadata JSONB NOT NULL,
2929
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
3030
updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
31-
deleted_at TIMESTAMP WITH TIME ZONE,
32-
CONSTRAINT pk_dataset_splits PRIMARY KEY (id)
31+
CONSTRAINT pk_dataset_splits PRIMARY KEY (id),
32+
CONSTRAINT uq_dataset_splits_name
33+
UNIQUE (name)
34+
);
35+
36+
37+
-- Table: datasets
38+
-- ---------------
39+
CREATE TABLE public.datasets (
40+
id serial NOT NULL,
41+
name VARCHAR NOT NULL,
42+
description VARCHAR,
43+
metadata JSONB NOT NULL,
44+
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
45+
updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
46+
CONSTRAINT pk_datasets PRIMARY KEY (id),
47+
CONSTRAINT uq_datasets_name
48+
UNIQUE (name)
49+
);
50+
51+
52+
-- Table: dataset_versions
53+
-- -----------------------
54+
CREATE TABLE public.dataset_versions (
55+
id serial NOT NULL,
56+
dataset_id INTEGER NOT NULL,
57+
description VARCHAR,
58+
metadata JSONB NOT NULL,
59+
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
60+
CONSTRAINT pk_dataset_versions PRIMARY KEY (id),
61+
CONSTRAINT fk_dataset_versions_dataset_id_datasets FOREIGN KEY
62+
(dataset_id)
63+
REFERENCES public.datasets (id)
64+
ON DELETE CASCADE
3365
);
3466

35-
CREATE UNIQUE INDEX ix_dataset_splits_check_unique_name ON public.dataset_splits
36-
USING btree (name) WHERE (deleted_at IS NULL);
67+
CREATE INDEX ix_dataset_versions_dataset_id ON public.dataset_versions
68+
USING btree (dataset_id);
3769

3870

3971
-- Table: generative_models
@@ -299,6 +331,87 @@ CREATE INDEX ix_spans_trace_rowid ON public.spans
299331
USING btree (trace_rowid);
300332

301333

334+
-- Table: dataset_examples
335+
-- -----------------------
336+
CREATE TABLE public.dataset_examples (
337+
id serial NOT NULL,
338+
dataset_id INTEGER NOT NULL,
339+
span_rowid INTEGER,
340+
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
341+
CONSTRAINT pk_dataset_examples PRIMARY KEY (id),
342+
CONSTRAINT fk_dataset_examples_dataset_id_datasets FOREIGN KEY
343+
(dataset_id)
344+
REFERENCES public.datasets (id)
345+
ON DELETE CASCADE,
346+
CONSTRAINT fk_dataset_examples_span_rowid_spans FOREIGN KEY
347+
(span_rowid)
348+
REFERENCES public.spans (id)
349+
ON DELETE SET NULL
350+
);
351+
352+
CREATE INDEX ix_dataset_examples_dataset_id ON public.dataset_examples
353+
USING btree (dataset_id);
354+
CREATE INDEX ix_dataset_examples_span_rowid ON public.dataset_examples
355+
USING btree (span_rowid);
356+
357+
358+
-- Table: dataset_example_revisions
359+
-- --------------------------------
360+
CREATE TABLE public.dataset_example_revisions (
361+
id serial NOT NULL,
362+
dataset_example_id INTEGER NOT NULL,
363+
dataset_version_id INTEGER NOT NULL,
364+
input JSONB NOT NULL,
365+
output JSONB NOT NULL,
366+
metadata JSONB NOT NULL,
367+
revision_kind VARCHAR NOT NULL,
368+
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
369+
CONSTRAINT pk_dataset_example_revisions PRIMARY KEY (id),
370+
CONSTRAINT uq_dataset_example_revisions_dataset_example_id_dataset_bbf2
371+
UNIQUE (dataset_example_id, dataset_version_id),
372+
CHECK (((revision_kind)::text = ANY ((ARRAY[
373+
'CREATE'::character varying,
374+
'PATCH'::character varying,
375+
'DELETE'::character varying
376+
])::text[]))),
377+
CONSTRAINT fk_dataset_example_revisions_dataset_example_id_dataset_c72a
378+
FOREIGN KEY
379+
(dataset_example_id)
380+
REFERENCES public.dataset_examples (id)
381+
ON DELETE CASCADE,
382+
CONSTRAINT fk_dataset_example_revisions_dataset_version_id_dataset_3a56
383+
FOREIGN KEY
384+
(dataset_version_id)
385+
REFERENCES public.dataset_versions (id)
386+
ON DELETE CASCADE
387+
);
388+
389+
CREATE INDEX ix_dataset_example_revisions_dataset_version_id ON public.dataset_example_revisions
390+
USING btree (dataset_version_id);
391+
392+
393+
-- Table: dataset_splits_dataset_examples
394+
-- --------------------------------------
395+
CREATE TABLE public.dataset_splits_dataset_examples (
396+
dataset_split_id BIGINT NOT NULL,
397+
dataset_example_id BIGINT NOT NULL,
398+
CONSTRAINT pk_dataset_splits_dataset_examples PRIMARY KEY (dataset_split_id, dataset_example_id),
399+
CONSTRAINT fk_dataset_splits_dataset_examples_dataset_example_id_d_63b2
400+
FOREIGN KEY
401+
(dataset_example_id)
402+
REFERENCES public.dataset_examples (id)
403+
ON DELETE CASCADE,
404+
CONSTRAINT fk_dataset_splits_dataset_examples_dataset_split_id_dat_a90c
405+
FOREIGN KEY
406+
(dataset_split_id)
407+
REFERENCES public.dataset_splits (id)
408+
ON DELETE CASCADE
409+
);
410+
411+
CREATE INDEX ix_dataset_splits_dataset_examples_dataset_example_id ON public.dataset_splits_dataset_examples
412+
USING btree (dataset_example_id);
413+
414+
302415
-- Table: span_costs
303416
-- -----------------
304417
CREATE TABLE public.span_costs (
@@ -452,131 +565,6 @@ CREATE TABLE public.dataset_labels (
452565
);
453566

454567

455-
-- Table: datasets
456-
-- ---------------
457-
CREATE TABLE public.datasets (
458-
id serial NOT NULL,
459-
name VARCHAR NOT NULL,
460-
description VARCHAR,
461-
metadata JSONB NOT NULL,
462-
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
463-
updated_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
464-
user_id INTEGER,
465-
CONSTRAINT pk_datasets PRIMARY KEY (id),
466-
CONSTRAINT uq_datasets_name
467-
UNIQUE (name),
468-
CONSTRAINT fk_datasets_user_id_users FOREIGN KEY
469-
(user_id)
470-
REFERENCES public.users (id)
471-
ON DELETE SET NULL
472-
);
473-
474-
475-
-- Table: dataset_examples
476-
-- -----------------------
477-
CREATE TABLE public.dataset_examples (
478-
id serial NOT NULL,
479-
dataset_id INTEGER NOT NULL,
480-
span_rowid INTEGER,
481-
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
482-
CONSTRAINT pk_dataset_examples PRIMARY KEY (id),
483-
CONSTRAINT fk_dataset_examples_dataset_id_datasets FOREIGN KEY
484-
(dataset_id)
485-
REFERENCES public.datasets (id)
486-
ON DELETE CASCADE,
487-
CONSTRAINT fk_dataset_examples_span_rowid_spans FOREIGN KEY
488-
(span_rowid)
489-
REFERENCES public.spans (id)
490-
ON DELETE SET NULL
491-
);
492-
493-
CREATE INDEX ix_dataset_examples_dataset_id ON public.dataset_examples
494-
USING btree (dataset_id);
495-
CREATE INDEX ix_dataset_examples_span_rowid ON public.dataset_examples
496-
USING btree (span_rowid);
497-
498-
499-
-- Table: dataset_splits_dataset_examples
500-
-- --------------------------------------
501-
CREATE TABLE public.dataset_splits_dataset_examples (
502-
dataset_split_id BIGINT NOT NULL,
503-
dataset_example_id BIGINT NOT NULL,
504-
CONSTRAINT pk_dataset_splits_dataset_examples PRIMARY KEY (dataset_split_id, dataset_example_id),
505-
CONSTRAINT fk_dataset_splits_dataset_examples_dataset_example_id_d_63b2
506-
FOREIGN KEY
507-
(dataset_example_id)
508-
REFERENCES public.dataset_examples (id)
509-
ON DELETE CASCADE,
510-
CONSTRAINT fk_dataset_splits_dataset_examples_dataset_split_id_dat_a90c
511-
FOREIGN KEY
512-
(dataset_split_id)
513-
REFERENCES public.dataset_splits (id)
514-
ON DELETE CASCADE
515-
);
516-
517-
CREATE INDEX ix_dataset_splits_dataset_examples_dataset_example_id ON public.dataset_splits_dataset_examples
518-
USING btree (dataset_example_id);
519-
520-
521-
-- Table: dataset_versions
522-
-- -----------------------
523-
CREATE TABLE public.dataset_versions (
524-
id serial NOT NULL,
525-
dataset_id INTEGER NOT NULL,
526-
description VARCHAR,
527-
metadata JSONB NOT NULL,
528-
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
529-
user_id INTEGER,
530-
CONSTRAINT pk_dataset_versions PRIMARY KEY (id),
531-
CONSTRAINT fk_dataset_versions_dataset_id_datasets FOREIGN KEY
532-
(dataset_id)
533-
REFERENCES public.datasets (id)
534-
ON DELETE CASCADE,
535-
CONSTRAINT fk_dataset_versions_user_id_users FOREIGN KEY
536-
(user_id)
537-
REFERENCES public.users (id)
538-
ON DELETE SET NULL
539-
);
540-
541-
CREATE INDEX ix_dataset_versions_dataset_id ON public.dataset_versions
542-
USING btree (dataset_id);
543-
544-
545-
-- Table: dataset_example_revisions
546-
-- --------------------------------
547-
CREATE TABLE public.dataset_example_revisions (
548-
id serial NOT NULL,
549-
dataset_example_id INTEGER NOT NULL,
550-
dataset_version_id INTEGER NOT NULL,
551-
input JSONB NOT NULL,
552-
output JSONB NOT NULL,
553-
metadata JSONB NOT NULL,
554-
revision_kind VARCHAR NOT NULL,
555-
created_at TIMESTAMP WITH TIME ZONE NOT NULL DEFAULT now(),
556-
CONSTRAINT pk_dataset_example_revisions PRIMARY KEY (id),
557-
CONSTRAINT uq_dataset_example_revisions_dataset_example_id_dataset_bbf2
558-
UNIQUE (dataset_example_id, dataset_version_id),
559-
CHECK (((revision_kind)::text = ANY ((ARRAY[
560-
'CREATE'::character varying,
561-
'PATCH'::character varying,
562-
'DELETE'::character varying
563-
])::text[]))),
564-
CONSTRAINT fk_dataset_example_revisions_dataset_example_id_dataset_c72a
565-
FOREIGN KEY
566-
(dataset_example_id)
567-
REFERENCES public.dataset_examples (id)
568-
ON DELETE CASCADE,
569-
CONSTRAINT fk_dataset_example_revisions_dataset_version_id_dataset_3a56
570-
FOREIGN KEY
571-
(dataset_version_id)
572-
REFERENCES public.dataset_versions (id)
573-
ON DELETE CASCADE
574-
);
575-
576-
CREATE INDEX ix_dataset_example_revisions_dataset_version_id ON public.dataset_example_revisions
577-
USING btree (dataset_version_id);
578-
579-
580568
-- Table: datasets_dataset_labels
581569
-- ------------------------------
582570
CREATE TABLE public.datasets_dataset_labels (
@@ -751,6 +739,7 @@ CREATE TABLE public.experiment_tags (
751739
user_id INTEGER,
752740
name VARCHAR NOT NULL,
753741
description VARCHAR,
742+
color VARCHAR NOT NULL,
754743
CONSTRAINT pk_experiment_tags PRIMARY KEY (id),
755744
CONSTRAINT uq_experiment_tags_dataset_id_name
756745
UNIQUE (dataset_id, name),

src/phoenix/db/migrations/versions/deb2c81c0bb2_dataset_splits.py

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def upgrade() -> None:
5656
op.create_table(
5757
"dataset_splits",
5858
sa.Column("id", _Integer, primary_key=True),
59-
sa.Column("name", sa.String, nullable=False),
59+
sa.Column("name", sa.String, nullable=False, unique=True),
6060
sa.Column("description", sa.String, nullable=True),
6161
sa.Column("color", sa.String, nullable=False),
6262
sa.Column("metadata", JSON_, nullable=False),
@@ -73,21 +73,6 @@ def upgrade() -> None:
7373
server_default=sa.func.now(),
7474
onupdate=sa.func.now(),
7575
),
76-
sa.Column(
77-
"deleted_at",
78-
sa.TIMESTAMP(timezone=True),
79-
nullable=True,
80-
server_default=None,
81-
),
82-
)
83-
84-
op.create_index(
85-
"ix_dataset_splits_check_unique_name",
86-
"dataset_splits",
87-
["name"],
88-
unique=True,
89-
postgresql_where=sa.text("deleted_at IS NULL"),
90-
sqlite_where=sa.text("deleted_at IS NULL"),
9176
)
9277

9378
# Create crosswalk table: dataset_splits_dataset_examples

src/phoenix/db/models.py

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1230,19 +1230,14 @@ class DatasetExampleRevision(HasId):
12301230

12311231
class DatasetSplit(HasId):
12321232
__tablename__ = "dataset_splits"
1233-
name: Mapped[str] = mapped_column(String, nullable=False)
1233+
name: Mapped[str] = mapped_column(String, nullable=False, unique=True)
12341234
description: Mapped[Optional[str]]
12351235
color: Mapped[str] = mapped_column(String, nullable=False)
12361236
metadata_: Mapped[dict[str, Any]] = mapped_column("metadata")
12371237
created_at: Mapped[datetime] = mapped_column(UtcTimeStamp, server_default=func.now())
12381238
updated_at: Mapped[datetime] = mapped_column(
12391239
UtcTimeStamp, server_default=func.now(), onupdate=func.now()
12401240
)
1241-
deleted_at: Mapped[Optional[datetime]] = mapped_column(
1242-
UtcTimeStamp,
1243-
nullable=True,
1244-
server_default=None,
1245-
)
12461241
dataset_splits_dataset_examples: Mapped[list["DatasetSplitDatasetExample"]] = relationship(
12471242
"DatasetSplitDatasetExample",
12481243
back_populates="dataset_split",
@@ -1252,16 +1247,6 @@ class DatasetSplit(HasId):
12521247
back_populates="dataset_split",
12531248
)
12541249

1255-
__table_args__ = (
1256-
Index(
1257-
"ix_dataset_splits_check_unique_name",
1258-
"name",
1259-
postgresql_where=sa.text("deleted_at IS NULL"),
1260-
sqlite_where=sa.text("deleted_at IS NULL"),
1261-
unique=True,
1262-
),
1263-
)
1264-
12651250

12661251
class DatasetSplitDatasetExample(Base):
12671252
__tablename__ = "dataset_splits_dataset_examples"

src/phoenix/server/api/dataloaders/dataset_example_splits.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ async def _load_fn(self, keys: list[Key]) -> list[Result]:
3232
),
3333
)
3434
.where(models.DatasetSplitDatasetExample.dataset_example_id.in_(example_ids))
35-
.where(models.DatasetSplit.deleted_at.is_(None))
3635
):
3736
if example_id not in splits:
3837
splits[example_id] = []

0 commit comments

Comments
 (0)