From 0a5a238aad8e5d64c9609faccfb800c8ffa1cd2a Mon Sep 17 00:00:00 2001 From: Bhagya Amarasinghe Date: Mon, 8 Jun 2026 22:29:50 +0530 Subject: [PATCH 1/4] feat: add taxonomy schema migration --- migrations/009_taxonomy_schema.sql | 199 +++++++++++++++++++++++++ tests/taxonomy_schema_test.go | 232 +++++++++++++++++++++++++++++ 2 files changed, 431 insertions(+) create mode 100644 migrations/009_taxonomy_schema.sql create mode 100644 tests/taxonomy_schema_test.go diff --git a/migrations/009_taxonomy_schema.sql b/migrations/009_taxonomy_schema.sql new file mode 100644 index 0000000..f0036c5 --- /dev/null +++ b/migrations/009_taxonomy_schema.sql @@ -0,0 +1,199 @@ +-- +goose Up +-- Taxonomy generation artifacts are stored as run-scoped Hub data. Keep them +-- separate from feedback_records so repeated generation never mutates source feedback. +CREATE TYPE taxonomy_run_status_enum AS ENUM ( + 'pending', + 'running', + 'succeeded', + 'failed', + 'canceled' +); + +CREATE TYPE taxonomy_node_type_enum AS ENUM ( + 'root', + 'branch', + 'leaf' +); + +CREATE TYPE taxonomy_node_event_type_enum AS ENUM ( + 'rename', + 'soft_remove' +); + +CREATE TABLE taxonomy_runs ( + id UUID PRIMARY KEY DEFAULT uuidv7(), + tenant_id VARCHAR(255) NOT NULL, + source_type VARCHAR(255) NOT NULL, + source_id VARCHAR(255) NOT NULL, + field_id VARCHAR(255) NOT NULL, + field_label TEXT, + status taxonomy_run_status_enum NOT NULL DEFAULT 'pending', + params JSONB NOT NULL DEFAULT '{}'::jsonb, + metrics JSONB NOT NULL DEFAULT '{}'::jsonb, + record_count INTEGER NOT NULL DEFAULT 0, + embedding_count INTEGER NOT NULL DEFAULT 0, + cluster_count INTEGER NOT NULL DEFAULT 0, + node_count INTEGER NOT NULL DEFAULT 0, + error TEXT, + started_at TIMESTAMPTZ, + finished_at TIMESTAMPTZ, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + CONSTRAINT taxonomy_runs_tenant_id_required CHECK (btrim(tenant_id) <> ''), + CONSTRAINT taxonomy_runs_source_type_required CHECK (btrim(source_type) <> ''), + CONSTRAINT taxonomy_runs_source_id_required CHECK (btrim(source_id) <> ''), + CONSTRAINT taxonomy_runs_field_id_required CHECK (btrim(field_id) <> ''), + CONSTRAINT taxonomy_runs_record_count_nonnegative CHECK (record_count >= 0), + CONSTRAINT taxonomy_runs_embedding_count_nonnegative CHECK (embedding_count >= 0), + CONSTRAINT taxonomy_runs_cluster_count_nonnegative CHECK (cluster_count >= 0), + CONSTRAINT taxonomy_runs_node_count_nonnegative CHECK (node_count >= 0), + UNIQUE (id, tenant_id, source_type, source_id, field_id) +); + +CREATE INDEX idx_taxonomy_runs_tenant_field_created_at + ON taxonomy_runs (tenant_id, source_type, source_id, field_id, created_at DESC, id); +CREATE INDEX idx_taxonomy_runs_tenant_status_created_at + ON taxonomy_runs (tenant_id, status, created_at DESC, id); + +CREATE TABLE taxonomy_clusters ( + id UUID PRIMARY KEY DEFAULT uuidv7(), + run_id UUID NOT NULL REFERENCES taxonomy_runs(id) ON DELETE CASCADE, + cluster_key INTEGER NOT NULL, + label TEXT, + llm_label TEXT, + keywords JSONB NOT NULL DEFAULT '[]'::jsonb, + size INTEGER NOT NULL DEFAULT 0, + is_outlier BOOLEAN NOT NULL DEFAULT false, + metrics JSONB NOT NULL DEFAULT '{}'::jsonb, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + CONSTRAINT taxonomy_clusters_size_nonnegative CHECK (size >= 0), + UNIQUE (run_id, cluster_key), + UNIQUE (id, run_id) +); + +CREATE INDEX idx_taxonomy_clusters_run_size + ON taxonomy_clusters (run_id, is_outlier, size DESC, id); + +CREATE TABLE taxonomy_cluster_memberships ( + id UUID PRIMARY KEY DEFAULT uuidv7(), + run_id UUID NOT NULL, + cluster_id UUID NOT NULL, + feedback_record_id UUID NOT NULL REFERENCES feedback_records(id) ON DELETE CASCADE, + confidence DOUBLE PRECISION, + distance DOUBLE PRECISION, + metadata JSONB NOT NULL DEFAULT '{}'::jsonb, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + FOREIGN KEY (cluster_id, run_id) REFERENCES taxonomy_clusters(id, run_id) ON DELETE CASCADE, + CONSTRAINT taxonomy_cluster_memberships_confidence_range + CHECK (confidence IS NULL OR (confidence >= 0 AND confidence <= 1)), + UNIQUE (run_id, feedback_record_id) +); + +CREATE INDEX idx_taxonomy_cluster_memberships_run_cluster + ON taxonomy_cluster_memberships (run_id, cluster_id, feedback_record_id); +CREATE INDEX idx_taxonomy_cluster_memberships_feedback_record + ON taxonomy_cluster_memberships (feedback_record_id, run_id); + +CREATE TABLE taxonomy_nodes ( + id UUID PRIMARY KEY DEFAULT uuidv7(), + run_id UUID NOT NULL REFERENCES taxonomy_runs(id) ON DELETE CASCADE, + parent_id UUID, + cluster_id UUID, + node_type taxonomy_node_type_enum NOT NULL, + label TEXT NOT NULL, + original_label TEXT, + description TEXT, + level INTEGER NOT NULL, + sort_order INTEGER NOT NULL DEFAULT 0, + metadata JSONB NOT NULL DEFAULT '{}'::jsonb, + removed_at TIMESTAMPTZ, + removed_by VARCHAR(255), + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + UNIQUE (id, run_id), + FOREIGN KEY (parent_id, run_id) REFERENCES taxonomy_nodes(id, run_id) ON DELETE CASCADE, + FOREIGN KEY (cluster_id, run_id) REFERENCES taxonomy_clusters(id, run_id) ON DELETE CASCADE, + CONSTRAINT taxonomy_nodes_label_required CHECK (btrim(label) <> ''), + CONSTRAINT taxonomy_nodes_level_nonnegative CHECK (level >= 0), + CONSTRAINT taxonomy_nodes_tree_shape CHECK ( + (node_type = 'root' AND parent_id IS NULL AND level = 0) + OR (node_type <> 'root' AND parent_id IS NOT NULL AND level > 0) + ) +); + +CREATE UNIQUE INDEX idx_taxonomy_nodes_one_root_per_run + ON taxonomy_nodes (run_id) + WHERE parent_id IS NULL; +CREATE INDEX idx_taxonomy_nodes_run_parent_sort + ON taxonomy_nodes (run_id, parent_id, sort_order, id); +CREATE INDEX idx_taxonomy_nodes_run_visible + ON taxonomy_nodes (run_id, parent_id, sort_order, id) + WHERE removed_at IS NULL; +CREATE INDEX idx_taxonomy_nodes_cluster + ON taxonomy_nodes (cluster_id) + WHERE cluster_id IS NOT NULL; + +CREATE TABLE taxonomy_active_runs ( + tenant_id VARCHAR(255) NOT NULL, + source_type VARCHAR(255) NOT NULL, + source_id VARCHAR(255) NOT NULL, + field_id VARCHAR(255) NOT NULL, + run_id UUID NOT NULL, + activated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + activated_by VARCHAR(255), + PRIMARY KEY (tenant_id, source_type, source_id, field_id), + FOREIGN KEY (run_id, tenant_id, source_type, source_id, field_id) + REFERENCES taxonomy_runs(id, tenant_id, source_type, source_id, field_id) + ON DELETE CASCADE, + CONSTRAINT taxonomy_active_runs_tenant_id_required CHECK (btrim(tenant_id) <> ''), + CONSTRAINT taxonomy_active_runs_source_type_required CHECK (btrim(source_type) <> ''), + CONSTRAINT taxonomy_active_runs_source_id_required CHECK (btrim(source_id) <> ''), + CONSTRAINT taxonomy_active_runs_field_id_required CHECK (btrim(field_id) <> ''), + UNIQUE (run_id) +); + +CREATE INDEX idx_taxonomy_active_runs_run + ON taxonomy_active_runs (run_id); + +CREATE TABLE taxonomy_node_events ( + id UUID PRIMARY KEY DEFAULT uuidv7(), + tenant_id VARCHAR(255) NOT NULL, + source_type VARCHAR(255) NOT NULL, + source_id VARCHAR(255) NOT NULL, + field_id VARCHAR(255) NOT NULL, + run_id UUID NOT NULL, + node_id UUID NOT NULL, + event_type taxonomy_node_event_type_enum NOT NULL, + actor_id VARCHAR(255) NOT NULL, + old_value JSONB NOT NULL DEFAULT '{}'::jsonb, + new_value JSONB NOT NULL DEFAULT '{}'::jsonb, + metadata JSONB NOT NULL DEFAULT '{}'::jsonb, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + FOREIGN KEY (run_id, tenant_id, source_type, source_id, field_id) + REFERENCES taxonomy_runs(id, tenant_id, source_type, source_id, field_id) + ON DELETE CASCADE, + FOREIGN KEY (node_id, run_id) REFERENCES taxonomy_nodes(id, run_id) ON DELETE CASCADE, + CONSTRAINT taxonomy_node_events_tenant_id_required CHECK (btrim(tenant_id) <> ''), + CONSTRAINT taxonomy_node_events_source_type_required CHECK (btrim(source_type) <> ''), + CONSTRAINT taxonomy_node_events_source_id_required CHECK (btrim(source_id) <> ''), + CONSTRAINT taxonomy_node_events_field_id_required CHECK (btrim(field_id) <> ''), + CONSTRAINT taxonomy_node_events_actor_id_required CHECK (btrim(actor_id) <> '') +); + +CREATE INDEX idx_taxonomy_node_events_tenant_created_at + ON taxonomy_node_events (tenant_id, created_at DESC, id); +CREATE INDEX idx_taxonomy_node_events_run_node_created_at + ON taxonomy_node_events (run_id, node_id, created_at DESC, id); + +-- +goose Down +DROP TABLE IF EXISTS taxonomy_node_events; +DROP TABLE IF EXISTS taxonomy_active_runs; +DROP TABLE IF EXISTS taxonomy_nodes; +DROP TABLE IF EXISTS taxonomy_cluster_memberships; +DROP TABLE IF EXISTS taxonomy_clusters; +DROP TABLE IF EXISTS taxonomy_runs; + +DROP TYPE IF EXISTS taxonomy_node_event_type_enum; +DROP TYPE IF EXISTS taxonomy_node_type_enum; +DROP TYPE IF EXISTS taxonomy_run_status_enum; diff --git a/tests/taxonomy_schema_test.go b/tests/taxonomy_schema_test.go new file mode 100644 index 0000000..e4186ed --- /dev/null +++ b/tests/taxonomy_schema_test.go @@ -0,0 +1,232 @@ +package tests + +import ( + "context" + "os" + "testing" + + "github.com/google/uuid" + "github.com/jackc/pgx/v5/pgxpool" + "github.com/stretchr/testify/require" + + "github.com/formbricks/hub/internal/config" + "github.com/formbricks/hub/pkg/database" +) + +func setupTaxonomySchemaTestDB(t *testing.T) *pgxpool.Pool { + t.Helper() + + databaseURL := os.Getenv("DATABASE_URL") + if databaseURL == "" { + databaseURL = defaultTestDatabaseURL + } + + t.Setenv("DATABASE_URL", databaseURL) + + cfg, err := config.Load() + require.NoError(t, err) + + db, err := database.NewPostgresPool(context.Background(), cfg.Database.URL, + database.WithPoolConfig(cfg.Database.PoolConfig()), + ) + require.NoError(t, err) + + t.Cleanup(db.Close) + + return db +} + +func TestTaxonomySchemaRelationshipsAndCascades(t *testing.T) { + ctx := context.Background() + db := setupTaxonomySchemaTestDB(t) + + tenantID := "taxonomy-schema-" + uuid.NewString() + sourceType := "formbricks" + sourceID := "feedback-directory-" + uuid.NewString() + fieldID := "feedback" + + t.Cleanup(func() { + _, _ = db.Exec(ctx, `DELETE FROM taxonomy_runs WHERE tenant_id = $1`, tenantID) + _, _ = db.Exec(ctx, `DELETE FROM feedback_records WHERE tenant_id = $1`, tenantID) + }) + + var feedbackRecordID uuid.UUID + + err := db.QueryRow(ctx, ` + INSERT INTO feedback_records ( + source_type, source_id, source_name, field_id, field_label, field_type, + value_text, tenant_id, submission_id + ) + VALUES ($1, $2, $3, $4, $5, $6::field_type_enum, $7, $8, $9) + RETURNING id`, + sourceType, sourceID, "Feedback Directory", fieldID, "Feedback", "text", + "Login was confusing", tenantID, "submission-"+uuid.NewString(), + ).Scan(&feedbackRecordID) + require.NoError(t, err) + + var runID uuid.UUID + + err = db.QueryRow(ctx, ` + INSERT INTO taxonomy_runs ( + tenant_id, source_type, source_id, field_id, field_label, status, + params, record_count, embedding_count + ) + VALUES ($1, $2, $3, $4, $5, $6::taxonomy_run_status_enum, $7::jsonb, 1, 1) + RETURNING id`, + tenantID, sourceType, sourceID, fieldID, "Feedback", "running", `{"min_topic_size":10}`, + ).Scan(&runID) + require.NoError(t, err) + + var clusterID uuid.UUID + + err = db.QueryRow(ctx, ` + INSERT INTO taxonomy_clusters (run_id, cluster_key, label, llm_label, keywords, size) + VALUES ($1, 1, $2, $3, $4::jsonb, 1) + RETURNING id`, + runID, "login,password", "Authentication issues", `["login","password"]`, + ).Scan(&clusterID) + require.NoError(t, err) + + _, err = db.Exec(ctx, ` + INSERT INTO taxonomy_cluster_memberships ( + run_id, cluster_id, feedback_record_id, confidence, metadata + ) + VALUES ($1, $2, $3, 0.91, $4::jsonb)`, + runID, clusterID, feedbackRecordID, `{"rank":1}`, + ) + require.NoError(t, err) + + _, err = db.Exec(ctx, ` + INSERT INTO taxonomy_cluster_memberships ( + run_id, cluster_id, feedback_record_id, confidence + ) + VALUES ($1, $2, $3, 0.90)`, + runID, clusterID, feedbackRecordID, + ) + require.Error(t, err, "a feedback record can be assigned only once per run") + + var rootID uuid.UUID + + err = db.QueryRow(ctx, ` + INSERT INTO taxonomy_nodes (run_id, node_type, label, original_label, level, sort_order) + VALUES ($1, $2::taxonomy_node_type_enum, 'Feedback', 'Feedback', 0, 0) + RETURNING id`, + runID, "root", + ).Scan(&rootID) + require.NoError(t, err) + + _, err = db.Exec(ctx, ` + INSERT INTO taxonomy_nodes (run_id, node_type, label, level, sort_order) + VALUES ($1, $2::taxonomy_node_type_enum, 'Duplicate root', 0, 1)`, + runID, "root", + ) + require.Error(t, err, "a run can have only one root node") + + _, err = db.Exec(ctx, ` + INSERT INTO taxonomy_nodes (run_id, node_type, label, level, sort_order) + VALUES ($1, $2::taxonomy_node_type_enum, 'Orphan branch', 1, 0)`, + runID, "branch", + ) + require.Error(t, err, "non-root taxonomy nodes must have a parent") + + var otherRunID uuid.UUID + + err = db.QueryRow(ctx, ` + INSERT INTO taxonomy_runs ( + tenant_id, source_type, source_id, field_id, field_label, status + ) + VALUES ($1, $2, $3, $4, $5, $6::taxonomy_run_status_enum) + RETURNING id`, + tenantID, sourceType, sourceID, fieldID, "Feedback", "pending", + ).Scan(&otherRunID) + require.NoError(t, err) + + _, err = db.Exec(ctx, ` + INSERT INTO taxonomy_nodes (run_id, parent_id, node_type, label, level, sort_order) + VALUES ($1, $2, $3::taxonomy_node_type_enum, 'Cross-run child', 1, 0)`, + otherRunID, rootID, "branch", + ) + require.Error(t, err, "taxonomy node parents must belong to the same run") + + var branchID uuid.UUID + + err = db.QueryRow(ctx, ` + INSERT INTO taxonomy_nodes ( + run_id, parent_id, node_type, label, original_label, level, sort_order + ) + VALUES ($1, $2, $3::taxonomy_node_type_enum, 'Product Access', 'Product Access', 1, 0) + RETURNING id`, + runID, rootID, "branch", + ).Scan(&branchID) + require.NoError(t, err) + + var leafID uuid.UUID + + err = db.QueryRow(ctx, ` + INSERT INTO taxonomy_nodes ( + run_id, parent_id, cluster_id, node_type, label, original_label, level, sort_order + ) + VALUES ($1, $2, $3, $4::taxonomy_node_type_enum, 'Login Problems', 'Login Problems', 2, 0) + RETURNING id`, + runID, branchID, clusterID, "leaf", + ).Scan(&leafID) + require.NoError(t, err) + + _, err = db.Exec(ctx, ` + INSERT INTO taxonomy_active_runs ( + tenant_id, source_type, source_id, field_id, run_id, activated_by + ) + VALUES ($1, $2, $3, $4, $5, $6)`, + tenantID, sourceType, sourceID, fieldID, runID, "user-1", + ) + require.NoError(t, err) + + _, err = db.Exec(ctx, ` + INSERT INTO taxonomy_active_runs ( + tenant_id, source_type, source_id, field_id, run_id, activated_by + ) + VALUES ($1, $2, $3, $4, $5, $6)`, + tenantID, sourceType, sourceID, fieldID, runID, "user-1", + ) + require.Error(t, err, "only one active run is allowed per directory field") + + _, err = db.Exec(ctx, ` + INSERT INTO taxonomy_node_events ( + tenant_id, source_type, source_id, field_id, run_id, node_id, + event_type, actor_id, old_value, new_value + ) + VALUES ( + $1, $2, $3, $4, $5, $6, + $7::taxonomy_node_event_type_enum, $8, $9::jsonb, $10::jsonb + )`, + tenantID, sourceType, sourceID, fieldID, runID, leafID, + "rename", "user-1", `{"label":"Login Problems"}`, `{"label":"Authentication Problems"}`, + ) + require.NoError(t, err) + + _, err = db.Exec(ctx, `DELETE FROM feedback_records WHERE id = $1`, feedbackRecordID) + require.NoError(t, err) + require.Equal(t, int64(0), countTaxonomyRows(t, db, ` + SELECT COUNT(*) FROM taxonomy_cluster_memberships WHERE feedback_record_id = $1`, + feedbackRecordID, + )) + + _, err = db.Exec(ctx, `DELETE FROM taxonomy_runs WHERE id = $1`, runID) + require.NoError(t, err) + + require.Equal(t, int64(0), countTaxonomyRows(t, db, `SELECT COUNT(*) FROM taxonomy_clusters WHERE run_id = $1`, runID)) + require.Equal(t, int64(0), countTaxonomyRows(t, db, `SELECT COUNT(*) FROM taxonomy_nodes WHERE run_id = $1`, runID)) + require.Equal(t, int64(0), countTaxonomyRows(t, db, `SELECT COUNT(*) FROM taxonomy_active_runs WHERE run_id = $1`, runID)) + require.Equal(t, int64(0), countTaxonomyRows(t, db, `SELECT COUNT(*) FROM taxonomy_node_events WHERE run_id = $1`, runID)) +} + +func countTaxonomyRows(t *testing.T, db *pgxpool.Pool, query string, args ...any) int64 { + t.Helper() + + var count int64 + + err := db.QueryRow(context.Background(), query, args...).Scan(&count) + require.NoError(t, err) + + return count +} From b0807c6ca623df3645c6cc9cb9420812e771a6a9 Mon Sep 17 00:00:00 2001 From: Bhagya Amarasinghe Date: Mon, 8 Jun 2026 23:06:51 +0530 Subject: [PATCH 2/4] fix: enforce taxonomy membership tenant boundaries --- migrations/009_taxonomy_schema.sql | 19 +++++++++++--- tests/taxonomy_schema_test.go | 40 ++++++++++++++++++++++++------ 2 files changed, 47 insertions(+), 12 deletions(-) diff --git a/migrations/009_taxonomy_schema.sql b/migrations/009_taxonomy_schema.sql index f0036c5..8e00130 100644 --- a/migrations/009_taxonomy_schema.sql +++ b/migrations/009_taxonomy_schema.sql @@ -1,6 +1,9 @@ --- +goose Up +-- +goose up -- Taxonomy generation artifacts are stored as run-scoped Hub data. Keep them -- separate from feedback_records so repeated generation never mutates source feedback. +ALTER TABLE feedback_records + ADD CONSTRAINT feedback_records_id_tenant_unique UNIQUE (id, tenant_id); + CREATE TYPE taxonomy_run_status_enum AS ENUM ( 'pending', 'running', @@ -47,6 +50,7 @@ CREATE TABLE taxonomy_runs ( CONSTRAINT taxonomy_runs_embedding_count_nonnegative CHECK (embedding_count >= 0), CONSTRAINT taxonomy_runs_cluster_count_nonnegative CHECK (cluster_count >= 0), CONSTRAINT taxonomy_runs_node_count_nonnegative CHECK (node_count >= 0), + UNIQUE (id, tenant_id), UNIQUE (id, tenant_id, source_type, source_id, field_id) ); @@ -78,22 +82,26 @@ CREATE INDEX idx_taxonomy_clusters_run_size CREATE TABLE taxonomy_cluster_memberships ( id UUID PRIMARY KEY DEFAULT uuidv7(), run_id UUID NOT NULL, + tenant_id VARCHAR(255) NOT NULL, cluster_id UUID NOT NULL, - feedback_record_id UUID NOT NULL REFERENCES feedback_records(id) ON DELETE CASCADE, + feedback_record_id UUID NOT NULL, confidence DOUBLE PRECISION, distance DOUBLE PRECISION, metadata JSONB NOT NULL DEFAULT '{}'::jsonb, created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + FOREIGN KEY (run_id, tenant_id) REFERENCES taxonomy_runs(id, tenant_id) ON DELETE CASCADE, FOREIGN KEY (cluster_id, run_id) REFERENCES taxonomy_clusters(id, run_id) ON DELETE CASCADE, + FOREIGN KEY (feedback_record_id, tenant_id) REFERENCES feedback_records(id, tenant_id) ON DELETE CASCADE, CONSTRAINT taxonomy_cluster_memberships_confidence_range CHECK (confidence IS NULL OR (confidence >= 0 AND confidence <= 1)), + CONSTRAINT taxonomy_cluster_memberships_tenant_id_required CHECK (btrim(tenant_id) <> ''), UNIQUE (run_id, feedback_record_id) ); CREATE INDEX idx_taxonomy_cluster_memberships_run_cluster ON taxonomy_cluster_memberships (run_id, cluster_id, feedback_record_id); CREATE INDEX idx_taxonomy_cluster_memberships_feedback_record - ON taxonomy_cluster_memberships (feedback_record_id, run_id); + ON taxonomy_cluster_memberships (tenant_id, feedback_record_id, run_id); CREATE TABLE taxonomy_nodes ( id UUID PRIMARY KEY DEFAULT uuidv7(), @@ -186,7 +194,7 @@ CREATE INDEX idx_taxonomy_node_events_tenant_created_at CREATE INDEX idx_taxonomy_node_events_run_node_created_at ON taxonomy_node_events (run_id, node_id, created_at DESC, id); --- +goose Down +-- +goose down DROP TABLE IF EXISTS taxonomy_node_events; DROP TABLE IF EXISTS taxonomy_active_runs; DROP TABLE IF EXISTS taxonomy_nodes; @@ -197,3 +205,6 @@ DROP TABLE IF EXISTS taxonomy_runs; DROP TYPE IF EXISTS taxonomy_node_event_type_enum; DROP TYPE IF EXISTS taxonomy_node_type_enum; DROP TYPE IF EXISTS taxonomy_run_status_enum; + +ALTER TABLE feedback_records + DROP CONSTRAINT IF EXISTS feedback_records_id_tenant_unique; diff --git a/tests/taxonomy_schema_test.go b/tests/taxonomy_schema_test.go index e4186ed..49f357f 100644 --- a/tests/taxonomy_schema_test.go +++ b/tests/taxonomy_schema_test.go @@ -41,13 +41,14 @@ func TestTaxonomySchemaRelationshipsAndCascades(t *testing.T) { db := setupTaxonomySchemaTestDB(t) tenantID := "taxonomy-schema-" + uuid.NewString() + otherTenantID := tenantID + "-other" sourceType := "formbricks" sourceID := "feedback-directory-" + uuid.NewString() fieldID := "feedback" t.Cleanup(func() { _, _ = db.Exec(ctx, `DELETE FROM taxonomy_runs WHERE tenant_id = $1`, tenantID) - _, _ = db.Exec(ctx, `DELETE FROM feedback_records WHERE tenant_id = $1`, tenantID) + _, _ = db.Exec(ctx, `DELETE FROM feedback_records WHERE tenant_id IN ($1, $2)`, tenantID, otherTenantID) }) var feedbackRecordID uuid.UUID @@ -64,6 +65,20 @@ func TestTaxonomySchemaRelationshipsAndCascades(t *testing.T) { ).Scan(&feedbackRecordID) require.NoError(t, err) + var otherFeedbackRecordID uuid.UUID + + err = db.QueryRow(ctx, ` + INSERT INTO feedback_records ( + source_type, source_id, source_name, field_id, field_label, field_type, + value_text, tenant_id, submission_id + ) + VALUES ($1, $2, $3, $4, $5, $6::field_type_enum, $7, $8, $9) + RETURNING id`, + sourceType, sourceID, "Feedback Directory", fieldID, "Feedback", "text", + "Billing was confusing", otherTenantID, "submission-"+uuid.NewString(), + ).Scan(&otherFeedbackRecordID) + require.NoError(t, err) + var runID uuid.UUID err = db.QueryRow(ctx, ` @@ -89,19 +104,28 @@ func TestTaxonomySchemaRelationshipsAndCascades(t *testing.T) { _, err = db.Exec(ctx, ` INSERT INTO taxonomy_cluster_memberships ( - run_id, cluster_id, feedback_record_id, confidence, metadata + run_id, tenant_id, cluster_id, feedback_record_id, confidence, metadata ) - VALUES ($1, $2, $3, 0.91, $4::jsonb)`, - runID, clusterID, feedbackRecordID, `{"rank":1}`, + VALUES ($1, $2, $3, $4, 0.91, $5::jsonb)`, + runID, tenantID, clusterID, feedbackRecordID, `{"rank":1}`, ) require.NoError(t, err) _, err = db.Exec(ctx, ` INSERT INTO taxonomy_cluster_memberships ( - run_id, cluster_id, feedback_record_id, confidence + run_id, tenant_id, cluster_id, feedback_record_id, confidence + ) + VALUES ($1, $2, $3, $4, 0.75)`, + runID, tenantID, clusterID, otherFeedbackRecordID, + ) + require.Error(t, err, "memberships cannot attach feedback records from another tenant") + + _, err = db.Exec(ctx, ` + INSERT INTO taxonomy_cluster_memberships ( + run_id, tenant_id, cluster_id, feedback_record_id, confidence ) - VALUES ($1, $2, $3, 0.90)`, - runID, clusterID, feedbackRecordID, + VALUES ($1, $2, $3, $4, 0.90)`, + runID, tenantID, clusterID, feedbackRecordID, ) require.Error(t, err, "a feedback record can be assigned only once per run") @@ -186,7 +210,7 @@ func TestTaxonomySchemaRelationshipsAndCascades(t *testing.T) { tenant_id, source_type, source_id, field_id, run_id, activated_by ) VALUES ($1, $2, $3, $4, $5, $6)`, - tenantID, sourceType, sourceID, fieldID, runID, "user-1", + tenantID, sourceType, sourceID, fieldID, otherRunID, "user-1", ) require.Error(t, err, "only one active run is allowed per directory field") From c21ef4de54256abfc63c2228b5eac4f633d48434 Mon Sep 17 00:00:00 2001 From: Bhagya Amarasinghe Date: Tue, 9 Jun 2026 22:49:12 +0530 Subject: [PATCH 3/4] fix: delete taxonomy data during tenant purge --- internal/repository/tenant_data_repository.go | 7 + .../repository/tenant_data_repository_test.go | 37 ++++- tests/integration_test.go | 136 ++++++++++++++++++ 3 files changed, 175 insertions(+), 5 deletions(-) diff --git a/internal/repository/tenant_data_repository.go b/internal/repository/tenant_data_repository.go index ccf615f..08a8fa3 100644 --- a/internal/repository/tenant_data_repository.go +++ b/internal/repository/tenant_data_repository.go @@ -99,6 +99,13 @@ func deleteTenantDataInTx( return nil, fmt.Errorf("delete tenant embeddings: %w", err) } + _, err = exec.Exec(ctx, ` + DELETE FROM taxonomy_runs + WHERE tenant_id = $1`, tenantID) + if err != nil { + return nil, fmt.Errorf("delete tenant taxonomy runs: %w", err) + } + feedbackRecordsTag, err := exec.Exec(ctx, ` DELETE FROM feedback_records WHERE tenant_id = $1`, tenantID) diff --git a/internal/repository/tenant_data_repository_test.go b/internal/repository/tenant_data_repository_test.go index c705140..86dbdcf 100644 --- a/internal/repository/tenant_data_repository_test.go +++ b/internal/repository/tenant_data_repository_test.go @@ -80,6 +80,7 @@ func TestTenantDataRepository_DeleteByTenant(t *testing.T) { fakeTenantDataExecutor: fakeTenantDataExecutor{ tags: []pgconn.CommandTag{ pgconn.NewCommandTag("DELETE 2"), + pgconn.NewCommandTag("DELETE 4"), pgconn.NewCommandTag("DELETE 3"), pgconn.NewCommandTag("DELETE 1"), }, @@ -152,6 +153,7 @@ func TestTenantDataRepository_DeleteByTenant(t *testing.T) { fakeTenantDataExecutor: fakeTenantDataExecutor{ tags: []pgconn.CommandTag{ pgconn.NewCommandTag("DELETE 2"), + pgconn.NewCommandTag("DELETE 4"), pgconn.NewCommandTag("DELETE 3"), pgconn.NewCommandTag("DELETE 1"), }, @@ -177,6 +179,7 @@ func TestDeleteTenantDataInTx(t *testing.T) { exec := &fakeTenantDataExecutor{ tags: []pgconn.CommandTag{ pgconn.NewCommandTag("DELETE 2"), + pgconn.NewCommandTag("DELETE 4"), pgconn.NewCommandTag("DELETE 3"), pgconn.NewCommandTag("DELETE 1"), }, @@ -191,14 +194,15 @@ func TestDeleteTenantDataInTx(t *testing.T) { t.Fatalf("counts = %+v, want embeddings=2 feedback=3 webhooks=1", counts) } - if len(exec.queries) != 3 { - t.Fatalf("queries = %d, want 3", len(exec.queries)) + if len(exec.queries) != 4 { + t.Fatalf("queries = %d, want 4", len(exec.queries)) } assertQueryContains(t, exec.queries[0], "DELETE FROM embeddings") assertQueryContains(t, exec.queries[0], "USING feedback_records") - assertQueryContains(t, exec.queries[1], "DELETE FROM feedback_records") - assertQueryContains(t, exec.queries[2], "DELETE FROM webhooks") + assertQueryContains(t, exec.queries[1], "DELETE FROM taxonomy_runs") + assertQueryContains(t, exec.queries[2], "DELETE FROM feedback_records") + assertQueryContains(t, exec.queries[3], "DELETE FROM webhooks") for queryIndex, args := range exec.args { if len(args) != 1 || args[0] != "org-123" { @@ -224,7 +228,7 @@ func TestDeleteTenantDataInTx(t *testing.T) { } }) - t.Run("stops before webhooks after feedback delete error", func(t *testing.T) { + t.Run("stops before feedback records after taxonomy runs delete error", func(t *testing.T) { exec := &fakeTenantDataExecutor{ tags: []pgconn.CommandTag{ pgconn.NewCommandTag("DELETE 2"), @@ -245,6 +249,29 @@ func TestDeleteTenantDataInTx(t *testing.T) { t.Fatalf("queries = %d, want 2", len(exec.queries)) } }) + + t.Run("stops before webhooks after feedback delete error", func(t *testing.T) { + exec := &fakeTenantDataExecutor{ + tags: []pgconn.CommandTag{ + pgconn.NewCommandTag("DELETE 2"), + pgconn.NewCommandTag("DELETE 4"), + }, + errAtQuery: 3, + } + + counts, err := deleteTenantDataInTx(context.Background(), exec, "org-123") + if err == nil { + t.Fatal("deleteTenantDataInTx() error = nil, want error") + } + + if counts != nil { + t.Fatalf("counts = %+v, want nil", counts) + } + + if len(exec.queries) != 3 { + t.Fatalf("queries = %d, want 3", len(exec.queries)) + } + }) } func assertQueryContains(t *testing.T, query, want string) { diff --git a/tests/integration_test.go b/tests/integration_test.go index 0c3b61a..cd9153d 100644 --- a/tests/integration_test.go +++ b/tests/integration_test.go @@ -15,6 +15,7 @@ import ( "time" "github.com/google/uuid" + "github.com/jackc/pgx/v5/pgxpool" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -1063,6 +1064,13 @@ func TestDeleteTenantData(t *testing.T) { require.NoError(t, embeddingsRepo.Upsert(ctx, tenantARecord2.ID, modelName, embedding)) require.NoError(t, embeddingsRepo.Upsert(ctx, tenantBRecord.ID, modelName, embedding)) + tenantATaxonomyRunID := createTenantDataTaxonomyGraph( + ctx, t, db, tenantA, tenantARecord1.ID, "tenant-data-delete-a-taxonomy-"+uuid.NewString(), tenantARecord1.FieldID, + ) + tenantBTaxonomyRunID := createTenantDataTaxonomyGraph( + ctx, t, db, tenantB, tenantBRecord.ID, "tenant-data-delete-b-taxonomy-"+uuid.NewString(), tenantBRecord.FieldID, + ) + deleteResp := deleteTenantData(ctx, t, client, server.URL, tenantA) assert.Equal(t, tenantA, deleteResp.TenantID) assert.Equal(t, int64(2), deleteResp.DeletedFeedbackRecords) @@ -1081,6 +1089,7 @@ func TestDeleteTenantData(t *testing.T) { ) _, err = embeddingsRepo.GetEmbeddingByFeedbackRecordAndModel(ctx, tenantARecord1.ID, modelName) require.ErrorIs(t, err, repository.ErrEmbeddingNotFound) + requireTenantDataTaxonomyRunDeleted(ctx, t, db, tenantATaxonomyRunID) requireTenantDataResourceStatus( ctx, t, client, fmt.Sprintf("%s/v1/feedback-records/%s", server.URL, tenantBRecord.ID), http.StatusOK, @@ -1088,6 +1097,7 @@ func TestDeleteTenantData(t *testing.T) { requireTenantDataResourceStatus(ctx, t, client, fmt.Sprintf("%s/v1/webhooks/%s", server.URL, tenantBWebhook.ID), http.StatusOK) _, err = embeddingsRepo.GetEmbeddingByFeedbackRecordAndModel(ctx, tenantBRecord.ID, modelName) require.NoError(t, err) + requireTenantDataTaxonomyRunPresent(ctx, t, db, tenantBTaxonomyRunID) repeatedResp := deleteTenantData(ctx, t, client, server.URL, tenantA) assert.Equal(t, int64(0), repeatedResp.DeletedFeedbackRecords) @@ -1210,6 +1220,132 @@ func createTenantDataWebhook( return webhook } +func createTenantDataTaxonomyGraph( + ctx context.Context, + t *testing.T, + db *pgxpool.Pool, + tenantID string, + feedbackRecordID uuid.UUID, + sourceID string, + fieldID string, +) uuid.UUID { + t.Helper() + + var runID uuid.UUID + + err := db.QueryRow(ctx, ` + INSERT INTO taxonomy_runs ( + tenant_id, source_type, source_id, field_id, field_label, status, + record_count, embedding_count + ) + VALUES ($1, 'formbricks', $2, $3, 'Feedback', 'succeeded'::taxonomy_run_status_enum, 1, 1) + RETURNING id`, + tenantID, sourceID, fieldID, + ).Scan(&runID) + require.NoError(t, err) + + var clusterID uuid.UUID + + err = db.QueryRow(ctx, ` + INSERT INTO taxonomy_clusters (run_id, cluster_key, label, llm_label, keywords, size) + VALUES ($1, 1, 'login', 'Login issues', '["login"]'::jsonb, 1) + RETURNING id`, + runID, + ).Scan(&clusterID) + require.NoError(t, err) + + _, err = db.Exec(ctx, ` + INSERT INTO taxonomy_cluster_memberships (run_id, tenant_id, cluster_id, feedback_record_id, confidence) + VALUES ($1, $2, $3, $4, 0.95)`, + runID, tenantID, clusterID, feedbackRecordID, + ) + require.NoError(t, err) + + var rootID uuid.UUID + + err = db.QueryRow(ctx, ` + INSERT INTO taxonomy_nodes (run_id, node_type, label, original_label, level, sort_order) + VALUES ($1, 'root'::taxonomy_node_type_enum, 'Feedback', 'Feedback', 0, 0) + RETURNING id`, + runID, + ).Scan(&rootID) + require.NoError(t, err) + + var branchID uuid.UUID + + err = db.QueryRow(ctx, ` + INSERT INTO taxonomy_nodes (run_id, parent_id, node_type, label, original_label, level, sort_order) + VALUES ($1, $2, 'branch'::taxonomy_node_type_enum, 'Product Access', 'Product Access', 1, 0) + RETURNING id`, + runID, rootID, + ).Scan(&branchID) + require.NoError(t, err) + + var leafID uuid.UUID + + err = db.QueryRow(ctx, ` + INSERT INTO taxonomy_nodes (run_id, parent_id, cluster_id, node_type, label, original_label, level, sort_order) + VALUES ($1, $2, $3, 'leaf'::taxonomy_node_type_enum, 'Login Problems', 'Login Problems', 2, 0) + RETURNING id`, + runID, branchID, clusterID, + ).Scan(&leafID) + require.NoError(t, err) + + _, err = db.Exec(ctx, ` + INSERT INTO taxonomy_active_runs (tenant_id, source_type, source_id, field_id, run_id, activated_by) + VALUES ($1, 'formbricks', $2, $3, $4, 'tenant-data-test')`, + tenantID, sourceID, fieldID, runID, + ) + require.NoError(t, err) + + _, err = db.Exec(ctx, ` + INSERT INTO taxonomy_node_events ( + tenant_id, source_type, source_id, field_id, run_id, node_id, + event_type, actor_id, old_value, new_value + ) + VALUES ( + $1, 'formbricks', $2, $3, $4, $5, + 'rename'::taxonomy_node_event_type_enum, 'tenant-data-test', + '{"label":"Login Problems"}'::jsonb, '{"label":"Authentication Problems"}'::jsonb + )`, + tenantID, sourceID, fieldID, runID, leafID, + ) + require.NoError(t, err) + + return runID +} + +func requireTenantDataTaxonomyRunDeleted(ctx context.Context, t *testing.T, db *pgxpool.Pool, runID uuid.UUID) { + t.Helper() + + assert.Equal(t, int64(0), countTenantDataRows(ctx, t, db, `SELECT COUNT(*) FROM taxonomy_runs WHERE id = $1`, runID)) + assert.Equal(t, int64(0), countTenantDataRows(ctx, t, db, `SELECT COUNT(*) FROM taxonomy_clusters WHERE run_id = $1`, runID)) + assert.Equal(t, int64(0), countTenantDataRows(ctx, t, db, `SELECT COUNT(*) FROM taxonomy_cluster_memberships WHERE run_id = $1`, runID)) + assert.Equal(t, int64(0), countTenantDataRows(ctx, t, db, `SELECT COUNT(*) FROM taxonomy_nodes WHERE run_id = $1`, runID)) + assert.Equal(t, int64(0), countTenantDataRows(ctx, t, db, `SELECT COUNT(*) FROM taxonomy_active_runs WHERE run_id = $1`, runID)) + assert.Equal(t, int64(0), countTenantDataRows(ctx, t, db, `SELECT COUNT(*) FROM taxonomy_node_events WHERE run_id = $1`, runID)) +} + +func requireTenantDataTaxonomyRunPresent(ctx context.Context, t *testing.T, db *pgxpool.Pool, runID uuid.UUID) { + t.Helper() + + assert.Equal(t, int64(1), countTenantDataRows(ctx, t, db, `SELECT COUNT(*) FROM taxonomy_runs WHERE id = $1`, runID)) + assert.Equal(t, int64(1), countTenantDataRows(ctx, t, db, `SELECT COUNT(*) FROM taxonomy_cluster_memberships WHERE run_id = $1`, runID)) + assert.Equal(t, int64(1), countTenantDataRows(ctx, t, db, `SELECT COUNT(*) FROM taxonomy_active_runs WHERE run_id = $1`, runID)) + assert.Equal(t, int64(1), countTenantDataRows(ctx, t, db, `SELECT COUNT(*) FROM taxonomy_node_events WHERE run_id = $1`, runID)) +} + +func countTenantDataRows(ctx context.Context, t *testing.T, db *pgxpool.Pool, query string, args ...any) int64 { + t.Helper() + + var count int64 + + err := db.QueryRow(ctx, query, args...).Scan(&count) + require.NoError(t, err) + + return count +} + func requireTenantDataResourceStatus( ctx context.Context, t *testing.T, From bb8f6c06b076760bffc489d3151ae9e9ed86ab6c Mon Sep 17 00:00:00 2001 From: Bhagya Amarasinghe Date: Wed, 10 Jun 2026 17:00:46 +0530 Subject: [PATCH 4/4] fix: build feedback record tenant index concurrently --- migrations/009_feedback_records_id_tenant_unique_index.sql | 7 +++++++ .../{009_taxonomy_schema.sql => 010_taxonomy_schema.sql} | 3 ++- 2 files changed, 9 insertions(+), 1 deletion(-) create mode 100644 migrations/009_feedback_records_id_tenant_unique_index.sql rename migrations/{009_taxonomy_schema.sql => 010_taxonomy_schema.sql} (98%) diff --git a/migrations/009_feedback_records_id_tenant_unique_index.sql b/migrations/009_feedback_records_id_tenant_unique_index.sql new file mode 100644 index 0000000..f64917c --- /dev/null +++ b/migrations/009_feedback_records_id_tenant_unique_index.sql @@ -0,0 +1,7 @@ +-- +goose NO TRANSACTION +-- +goose up +CREATE UNIQUE INDEX CONCURRENTLY feedback_records_id_tenant_uidx + ON feedback_records (id, tenant_id); + +-- +goose down +DROP INDEX CONCURRENTLY IF EXISTS feedback_records_id_tenant_uidx; diff --git a/migrations/009_taxonomy_schema.sql b/migrations/010_taxonomy_schema.sql similarity index 98% rename from migrations/009_taxonomy_schema.sql rename to migrations/010_taxonomy_schema.sql index 8e00130..9796ea4 100644 --- a/migrations/009_taxonomy_schema.sql +++ b/migrations/010_taxonomy_schema.sql @@ -2,7 +2,8 @@ -- Taxonomy generation artifacts are stored as run-scoped Hub data. Keep them -- separate from feedback_records so repeated generation never mutates source feedback. ALTER TABLE feedback_records - ADD CONSTRAINT feedback_records_id_tenant_unique UNIQUE (id, tenant_id); + ADD CONSTRAINT feedback_records_id_tenant_unique + UNIQUE USING INDEX feedback_records_id_tenant_uidx; CREATE TYPE taxonomy_run_status_enum AS ENUM ( 'pending',