From f5b4b53dfca750b717a050cb8d5c2995e81fa4e2 Mon Sep 17 00:00:00 2001 From: Dimitri Fontaine Date: Wed, 11 Sep 2024 17:19:58 +0200 Subject: [PATCH 1/8] Review pgcopydb stream sentinel CLI. Add new options to `pgcopydb stream sentinel get` allowing to fetch a single LSN value, avoiding the need to use `awk`, `jq`, or something equivalent to then parse a single entry in the command output. --- docs/include/stream-sentinel-get.rst | 1 + docs/include/stream-sentinel-set-endpos.rst | 1 - src/bin/pgcopydb/cli_sentinel.c | 12 +++++++++++- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/docs/include/stream-sentinel-get.rst b/docs/include/stream-sentinel-get.rst index 2e85d448c..ab52c10df 100644 --- a/docs/include/stream-sentinel-get.rst +++ b/docs/include/stream-sentinel-get.rst @@ -9,5 +9,6 @@ --apply Get only the apply value --write-lsn Get only the write LSN value --flush-lsn Get only the flush LSN value + --transform-lsn Get only the tranform LSN value --replay-lsn Get only the replay LSN value diff --git a/docs/include/stream-sentinel-set-endpos.rst b/docs/include/stream-sentinel-set-endpos.rst index 2d0b86485..5debd4c71 100644 --- a/docs/include/stream-sentinel-set-endpos.rst +++ b/docs/include/stream-sentinel-set-endpos.rst @@ -3,6 +3,5 @@ pgcopydb stream sentinel set endpos: Set the sentinel end position LSN usage: pgcopydb stream sentinel set endpos [ --source ... ] [ | --current ] - --source Postgres URI to the source database --current Use pg_current_wal_flush_lsn() as the endpos diff --git a/src/bin/pgcopydb/cli_sentinel.c b/src/bin/pgcopydb/cli_sentinel.c index e515a9459..8fef8f8e1 100644 --- a/src/bin/pgcopydb/cli_sentinel.c +++ b/src/bin/pgcopydb/cli_sentinel.c @@ -50,6 +50,7 @@ CommandLine sentinel_get_command = " --apply Get only the apply value\n" " --write-lsn Get only the write LSN value\n" " --flush-lsn Get only the flush LSN value\n" + " --transform-lsn Get only the tranform LSN value\n" " --replay-lsn Get only the replay LSN value\n", cli_sentinel_getopts, cli_sentinel_get); @@ -127,6 +128,7 @@ cli_sentinel_getopts(int argc, char **argv) { "endpos", no_argument, NULL, 'e' }, { "apply", no_argument, NULL, 'a' }, { "write-lsn", no_argument, NULL, 'w' }, + { "transform-lsn", no_argument, NULL, 't' }, { "flush-lsn", no_argument, NULL, 'f' }, { "replay-lsn", no_argument, NULL, 'r' }, { "current", no_argument, NULL, 'C' }, @@ -209,6 +211,14 @@ cli_sentinel_getopts(int argc, char **argv) break; } + case 't': + { + ++sentinelOptionsCount; + options.sentinelOptions.transformLSN = true; + log_trace("--transform-lsn"); + break; + } + case 'f': { ++sentinelOptionsCount; @@ -316,7 +326,7 @@ cli_sentinel_getopts(int argc, char **argv) if (sentinelOptionsCount > 1) { log_fatal("Please choose only one of --startpos --endpos --apply " - "--write-lsn --flush-lsn --replay-lsn"); + "--write-lsn --transform-lsn --flush-lsn --replay-lsn"); ++errors; } From 0a8572e9492daf189f63b03bc75f5c9859c977aa Mon Sep 17 00:00:00 2001 From: Dimitri Fontaine Date: Wed, 11 Sep 2024 17:36:53 +0200 Subject: [PATCH 2/8] Fix docker-compose volume sharing between services. In unit tests where we use both an inject and a test services, we need the inject service to access the same pgcopydb work directory as the test service. For that we use an external docker volume. The previous implementation of that idea was found to be wrong, this is fixing it by properly using docker APIs for shared "external" volumes. --- tests/cdc-wal2json/Makefile | 17 ++++++++++++++-- tests/cdc-wal2json/compose.yaml | 35 ++++++++++++++++++++------------- tests/cdc-wal2json/copydb.sh | 4 ++-- tests/follow-wal2json/Makefile | 6 +++--- tests/follow-wal2json/paths.env | 2 ++ tests/follow-wal2json/uris.env | 3 +++ 6 files changed, 46 insertions(+), 21 deletions(-) create mode 100644 tests/follow-wal2json/paths.env create mode 100644 tests/follow-wal2json/uris.env diff --git a/tests/cdc-wal2json/Makefile b/tests/cdc-wal2json/Makefile index f614123fb..72c9e0f42 100644 --- a/tests/cdc-wal2json/Makefile +++ b/tests/cdc-wal2json/Makefile @@ -8,7 +8,7 @@ test: down run down ; up: down build docker compose up $(COMPOSE_EXIT) -run: build +run: build fix-volumes docker compose run test down: @@ -17,4 +17,17 @@ down: build: docker compose build --quiet -.PHONY: run down build test +VPATH = /var/run/pgcopydb +CNAME = cdc-wal2json +VNAME = cdc-wal2json +VOLUMES = -v $(VNAME):$(VPATH) +OPTS = --env-file=../paths.env $(VOLUMES) +CLEANUP = make -f /var/lib/postgres/cleanup.mk + +fix-volumes: + docker run --rm $(OPTS) $(CNAME) $(CLEANUP) + +attach: + docker run --rm -it $(OPTS) $(CNAME) bash + +.PHONY: run down build test fix-volumes attach diff --git a/tests/cdc-wal2json/compose.yaml b/tests/cdc-wal2json/compose.yaml index 5ae9c10d2..72b82e50b 100644 --- a/tests/cdc-wal2json/compose.yaml +++ b/tests/cdc-wal2json/compose.yaml @@ -5,38 +5,45 @@ services: dockerfile: Dockerfile.pg expose: - 5432 - environment: - POSTGRES_USER: postgres - POSTGRES_PASSWORD: h4ckm3 - POSTGRES_HOST_AUTH_METHOD: trust + env_file: + - ../postgres.env command: > -c wal_level=logical -c ssl=on -c ssl_cert_file=/etc/ssl/certs/ssl-cert-snakeoil.pem -c ssl_key_file=/etc/ssl/private/ssl-cert-snakeoil.key + target: image: postgres:13-bullseye expose: - 5432 - environment: - POSTGRES_USER: postgres - POSTGRES_PASSWORD: h4ckm3 - POSTGRES_HOST_AUTH_METHOD: trust + env_file: + - ../postgres.env command: > -c ssl=on -c ssl_cert_file=/etc/ssl/certs/ssl-cert-snakeoil.pem -c ssl_key_file=/etc/ssl/private/ssl-cert-snakeoil.key + test: - build: - context: . - dockerfile: Dockerfile + image: cdc-wal2json + build: . + cap_add: + - SYS_ADMIN + - SYS_PTRACE environment: - PGSSLMODE: "require" - PGCOPYDB_SOURCE_PGURI: postgres://postgres:h4ckm3@source/postgres - PGCOPYDB_TARGET_PGURI: postgres://postgres:h4ckm3@target/postgres PGCOPYDB_TABLE_JOBS: 4 PGCOPYDB_INDEX_JOBS: 2 PGCOPYDB_OUTPUT_PLUGIN: wal2json + env_file: + - ../uris.env + - ../paths.env + # share TMPDIR between inject and test services + volumes: + - cdc-wal2json:/var/run/pgcopydb depends_on: - source - target + +volumes: + cdc-wal2json: + external: true diff --git a/tests/cdc-wal2json/copydb.sh b/tests/cdc-wal2json/copydb.sh index 7ddb17492..a3e6cea0c 100755 --- a/tests/cdc-wal2json/copydb.sh +++ b/tests/cdc-wal2json/copydb.sh @@ -45,7 +45,7 @@ lsn=`psql -At -d ${PGCOPYDB_SOURCE_PGURI} -c 'select pg_current_wal_lsn()'` # and prefetch the changes captured in our replication slot pgcopydb stream prefetch --resume --endpos "${lsn}" -vv -SHAREDIR=/var/lib/postgres/.local/share/pgcopydb +SHAREDIR=${XDG_DATA_HOME}/pgcopydb WALFILE=000000010000000000000002.json SQLFILE=000000010000000000000002.sql @@ -54,7 +54,7 @@ SQLFILE=000000010000000000000002.sql expected=/tmp/expected.json result=/tmp/result.json -JQSCRIPT='del(.lsn) | del(.nextlsn) | del(.timestamp) | del(.xid) | if has("message") then .message |= sub("(?COMMIT|BEGIN) [0-9]+"; "\(.m) XXX") else . end' +JQSCRIPT='del(.lsn) | del(.nextlsn) | del(.timestamp) | del(.xid)' jq "${JQSCRIPT}" /usr/src/pgcopydb/${WALFILE} > ${expected} jq "${JQSCRIPT}" ${SHAREDIR}/${WALFILE} > ${result} diff --git a/tests/follow-wal2json/Makefile b/tests/follow-wal2json/Makefile index 16bbb9374..b2d46dd60 100644 --- a/tests/follow-wal2json/Makefile +++ b/tests/follow-wal2json/Makefile @@ -9,10 +9,10 @@ up: down build docker compose up $(COMPOSE_EXIT) run: build fix-volumes - docker compose run test + docker-compose run test down: - docker compose down + docker-compose down --volumes --remove-orphans build: docker compose build --quiet @@ -30,4 +30,4 @@ fix-volumes: attach: docker run --rm -it $(OPTS) $(CNAME) bash -.PHONY: run down build test fix-volumes +.PHONY: run down build test fix-volumes attach diff --git a/tests/follow-wal2json/paths.env b/tests/follow-wal2json/paths.env new file mode 100644 index 000000000..3694e1204 --- /dev/null +++ b/tests/follow-wal2json/paths.env @@ -0,0 +1,2 @@ +TMPDIR=/var/run/pgcopydb +XDG_DATA_HOME=/var/run/pgcopydb/cdc diff --git a/tests/follow-wal2json/uris.env b/tests/follow-wal2json/uris.env new file mode 100644 index 000000000..dad5cf45c --- /dev/null +++ b/tests/follow-wal2json/uris.env @@ -0,0 +1,3 @@ +PGSSLMODE="require" +PGCOPYDB_SOURCE_PGURI="postgres://postgres:h4ckm3@source/postgres" +PGCOPYDB_TARGET_PGURI="postgres://postgres:h4ckm3@target/postgres" From 64aecdad41b6e703985cd81f0d4f81f8f277a77a Mon Sep 17 00:00:00 2001 From: Dimitri Fontaine Date: Wed, 11 Sep 2024 17:40:10 +0200 Subject: [PATCH 3/8] {WIP} Use SQLite as the file format for CDC streaming. --- src/bin/pgcopydb/catalog.c | 72 ++- src/bin/pgcopydb/catalog.h | 1 + src/bin/pgcopydb/cli_clone_follow.c | 2 + src/bin/pgcopydb/cli_snapshot.c | 1 + src/bin/pgcopydb/cli_stream.c | 30 +- src/bin/pgcopydb/copydb.c | 12 +- src/bin/pgcopydb/copydb.h | 10 +- src/bin/pgcopydb/follow.c | 36 -- src/bin/pgcopydb/ld_apply.c | 25 +- src/bin/pgcopydb/ld_store.c | 446 +++++++++++++++ src/bin/pgcopydb/ld_stream.c | 808 +++++----------------------- src/bin/pgcopydb/ld_stream.h | 78 +-- src/bin/pgcopydb/ld_transform.c | 37 +- src/bin/pgcopydb/pgsql.c | 92 ---- src/bin/pgcopydb/pgsql.h | 8 - src/bin/pgcopydb/schema.h | 4 +- src/bin/pgcopydb/sentinel.c | 115 +++- src/bin/pgcopydb/snapshot.c | 22 + 18 files changed, 866 insertions(+), 933 deletions(-) create mode 100644 src/bin/pgcopydb/ld_store.c diff --git a/src/bin/pgcopydb/catalog.c b/src/bin/pgcopydb/catalog.c index 048b93ec6..b1be98e3b 100644 --- a/src/bin/pgcopydb/catalog.c +++ b/src/bin/pgcopydb/catalog.c @@ -186,7 +186,14 @@ static char *sourceDBcreateDDLs[] = { "create table sentinel(" " id integer primary key check (id = 1), " " startpos pg_lsn, endpos pg_lsn, apply bool, " - " write_lsn pg_lsn, flush_lsn pg_lsn, replay_lsn pg_lsn)", + " write_lsn pg_lsn, flush_lsn pg_lsn, " + " transform_lsn pg_lsn, " + " replay_lsn pg_lsn)", + + "create table cdc_files(" + " id integer primary key, filename text unique, timeline integer, " + " startpos pg_lsn, endpos pg_lsn, " + " start_time_epoch integer, done_time_epoch integer)", "create table timeline_history(" " tli integer primary key, startpos pg_lsn, endpos pg_lsn)" @@ -413,6 +420,28 @@ static char *targetDBcreateDDLs[] = { }; +static char *replayDBcreateDDLs[] = { + "create table output(" + " id integer primary key, " + " action text, xid integer, lsn pg_lsn, timestamp text, " + " message text)", + + "create unique index o_a_lsn on output(action, lsn)", + "create unique index o_a_xid on output(action, xid)", + + "create table stmt(hash text primary key, sql text)", + "create unique index stmt_hash on stmt(hash)", + + "create table replay(" + " id integer primary key, " + " action text, xid integer, lsn pg_lsn, timestamp text, " + " stmt_hash text references stmt(hash), stmt_args jsonb)", + + "create index r_xid on replay(xid)", + "create index r_lsn on replay(lsn)", +}; + + static char *sourceDBdropDDLs[] = { "drop table if exists setup", "drop table if exists section", @@ -480,6 +509,12 @@ static char *targetDBdropDDLs[] = { }; +static char *replayDBdropDDLs[] = { + "drop table if exists output", + "drop table if exists replay" +}; + + /* * catalog_init_from_specs initializes our internal catalog database file from * a specification. @@ -952,6 +987,13 @@ catalog_create_schema(DatabaseCatalog *catalog) break; } + case DATABASE_CATALOG_TYPE_REPLAY: + { + createDDLs = replayDBcreateDDLs; + count = sizeof(replayDBcreateDDLs) / sizeof(replayDBcreateDDLs[0]); + break; + } + default: { log_error("BUG: called catalog_init for unknown type %d", @@ -1012,6 +1054,13 @@ catalog_drop_schema(DatabaseCatalog *catalog) break; } + case DATABASE_CATALOG_TYPE_REPLAY: + { + dropDDLs = replayDBdropDDLs; + count = sizeof(replayDBdropDDLs) / sizeof(replayDBdropDDLs[0]); + break; + } + default: { log_error("BUG: called catalog_drop_schema for unknown type %d", @@ -8269,6 +8318,27 @@ catalog_bind_parameters(sqlite3 *db, switch (p->type) { + case BIND_PARAMETER_TYPE_NULL: + { + int rc = sqlite3_bind_null(ppStmt, n); + + if (rc != SQLITE_OK) + { + log_error("[SQLite %d] Failed to bind \"%s\" to NULL: %s", + rc, + p->name, + sqlite3_errstr(rc)); + return false; + } + + if (logSQL) + { + appendPQExpBuffer(debugParameters, "%s", "null"); + } + + break; + } + case BIND_PARAMETER_TYPE_INT: { int rc = sqlite3_bind_int(ppStmt, n, p->intVal); diff --git a/src/bin/pgcopydb/catalog.h b/src/bin/pgcopydb/catalog.h index 21f9d5f0f..c17a95475 100644 --- a/src/bin/pgcopydb/catalog.h +++ b/src/bin/pgcopydb/catalog.h @@ -648,6 +648,7 @@ bool catalog_timeline_history_fetch(SQLiteQuery *query); typedef enum { BIND_PARAMETER_TYPE_UNKNOWN = 0, + BIND_PARAMETER_TYPE_NULL, BIND_PARAMETER_TYPE_INT, BIND_PARAMETER_TYPE_INT64, BIND_PARAMETER_TYPE_TEXT diff --git a/src/bin/pgcopydb/cli_clone_follow.c b/src/bin/pgcopydb/cli_clone_follow.c index b6f3e0ec4..c8e14c301 100644 --- a/src/bin/pgcopydb/cli_clone_follow.c +++ b/src/bin/pgcopydb/cli_clone_follow.c @@ -210,6 +210,7 @@ clone_and_follow(CopyDataSpec *copySpecs) copyDBoptions.endpos, STREAM_MODE_CATCHUP, &(copySpecs->catalogs.source), + &(copySpecs->catalogs.replay), copyDBoptions.stdIn, copyDBoptions.stdOut, logSQL)) @@ -388,6 +389,7 @@ cli_follow(int argc, char **argv) copyDBoptions.endpos, STREAM_MODE_CATCHUP, &(copySpecs.catalogs.source), + &(copySpecs.catalogs.replay), copyDBoptions.stdIn, copyDBoptions.stdOut, logSQL)) diff --git a/src/bin/pgcopydb/cli_snapshot.c b/src/bin/pgcopydb/cli_snapshot.c index f1fd0351b..c89b56fa4 100644 --- a/src/bin/pgcopydb/cli_snapshot.c +++ b/src/bin/pgcopydb/cli_snapshot.c @@ -323,6 +323,7 @@ cli_create_snapshot(int argc, char **argv) createSNoptions.endpos, STREAM_MODE_CATCHUP, &(copySpecs.catalogs.source), + &(copySpecs.catalogs.replay), createSNoptions.stdIn, createSNoptions.stdOut, logSQL)) diff --git a/src/bin/pgcopydb/cli_stream.c b/src/bin/pgcopydb/cli_stream.c index acc16f630..6044fc34f 100644 --- a/src/bin/pgcopydb/cli_stream.c +++ b/src/bin/pgcopydb/cli_stream.c @@ -582,6 +582,7 @@ cli_stream_setup(int argc, char **argv) streamDBoptions.endpos, STREAM_MODE_CATCHUP, &(copySpecs.catalogs.source), + &(copySpecs.catalogs.replay), streamDBoptions.stdIn, streamDBoptions.stdOut, logSQL)) @@ -709,6 +710,7 @@ cli_stream_catchup(int argc, char **argv) streamDBoptions.endpos, STREAM_MODE_CATCHUP, &(copySpecs.catalogs.source), + &(copySpecs.catalogs.replay), streamDBoptions.stdIn, streamDBoptions.stdOut, logSQL)) @@ -792,6 +794,7 @@ cli_stream_replay(int argc, char **argv) streamDBoptions.endpos, STREAM_MODE_REPLAY, &(copySpecs.catalogs.source), + &(copySpecs.catalogs.replay), true, /* stdin */ true, /* stdout */ logSQL)) @@ -800,17 +803,6 @@ cli_stream_replay(int argc, char **argv) exit(EXIT_CODE_INTERNAL_ERROR); } - /* - * Remove the possibly still existing stream context files from - * previous round of operations (--resume, etc). We want to make sure - * that the catchup process reads the files created on this connection. - */ - if (!stream_cleanup_context(&specs)) - { - /* errors have already been logged */ - exit(EXIT_CODE_INTERNAL_ERROR); - } - /* * Before starting the receive, transform, and apply sub-processes, we need * to set the sentinel endpos to the command line --endpos option, when @@ -917,6 +909,7 @@ cli_stream_transform(int argc, char **argv) streamDBoptions.endpos, STREAM_MODE_CATCHUP, &(copySpecs.catalogs.source), + &(copySpecs.catalogs.replay), streamDBoptions.stdIn, streamDBoptions.stdOut, logSQL)) @@ -1079,6 +1072,7 @@ cli_stream_apply(int argc, char **argv) streamDBoptions.endpos, STREAM_MODE_CATCHUP, &(copySpecs.catalogs.source), + &(copySpecs.catalogs.replay), true, /* streamDBoptions.stdIn */ false, /* streamDBoptions.stdOut */ logSQL)) @@ -1102,6 +1096,7 @@ cli_stream_apply(int argc, char **argv) if (!stream_apply_init_context(&context, &(copySpecs.catalogs.source), + &(copySpecs.catalogs.replay), &(copySpecs.cfPaths.cdc), &(streamDBoptions.connStrings), streamDBoptions.origin, @@ -1190,6 +1185,7 @@ stream_start_in_mode(LogicalStreamMode mode) streamDBoptions.endpos, mode, &(copySpecs.catalogs.source), + &(copySpecs.catalogs.replay), streamDBoptions.stdIn, streamDBoptions.stdOut, logSQL)) @@ -1214,18 +1210,6 @@ stream_start_in_mode(LogicalStreamMode mode) case STREAM_MODE_PREFETCH: { - /* - * Remove the possibly still existing stream context files from - * previous round of operations (--resume, etc). We want to make - * sure that the catchup process reads the files created on this - * connection. - */ - if (!stream_cleanup_context(&specs)) - { - /* errors have already been logged */ - exit(EXIT_CODE_INTERNAL_ERROR); - } - if (!followDB(©Specs, &specs)) { /* errors have already been logged */ diff --git a/src/bin/pgcopydb/copydb.c b/src/bin/pgcopydb/copydb.c index d730b03fa..e98682462 100644 --- a/src/bin/pgcopydb/copydb.c +++ b/src/bin/pgcopydb/copydb.c @@ -561,21 +561,25 @@ copydb_init_specs(CopyDataSpec *specs, DatabaseCatalog *source = &(specs->catalogs.source); DatabaseCatalog *filter = &(specs->catalogs.filter); DatabaseCatalog *target = &(specs->catalogs.target); + DatabaseCatalog *replay = &(specs->catalogs.replay); /* init the catalog type */ source->type = DATABASE_CATALOG_TYPE_SOURCE; filter->type = DATABASE_CATALOG_TYPE_FILTER; target->type = DATABASE_CATALOG_TYPE_TARGET; + replay->type = DATABASE_CATALOG_TYPE_REPLAY; /* pick the dbfile from the specs */ strlcpy(source->dbfile, specs->cfPaths.sdbfile, sizeof(source->dbfile)); strlcpy(filter->dbfile, specs->cfPaths.fdbfile, sizeof(filter->dbfile)); strlcpy(target->dbfile, specs->cfPaths.tdbfile, sizeof(target->dbfile)); - bool shouldCreateVacuumQueue = (specs->section == DATA_SECTION_ALL || - specs->section == DATA_SECTION_INDEXES || - specs->section == DATA_SECTION_TABLE_DATA) && - !specs->skipVacuum; + bool shouldCreateVacuumQueue = + (specs->section == DATA_SECTION_ALL || + specs->section == DATA_SECTION_INDEXES || + specs->section == DATA_SECTION_TABLE_DATA) && + !specs->skipVacuum; + if (shouldCreateVacuumQueue) { if (!queue_create(&(specs->vacuumQueue), "vacuum")) diff --git a/src/bin/pgcopydb/copydb.h b/src/bin/pgcopydb/copydb.h index 65d0c8d88..b14e7afbd 100644 --- a/src/bin/pgcopydb/copydb.h +++ b/src/bin/pgcopydb/copydb.h @@ -175,8 +175,8 @@ typedef struct ExtensionReqs /* * pgcopydb sentinel is a table that's created on the source catalog and allows - * communicating elements from the outside, and in between the receive and - * apply processes. + * communicating elements from the outside, and in between the receive, + * transform and apply processes. */ typedef struct CopyDBSentinel { @@ -184,6 +184,7 @@ typedef struct CopyDBSentinel uint64_t startpos; uint64_t endpos; uint64_t write_lsn; + uint64_t transform_lsn; uint64_t flush_lsn; uint64_t replay_lsn; } CopyDBSentinel; @@ -498,6 +499,7 @@ bool sentinel_update_write_flush_lsn(DatabaseCatalog *catalog, uint64_t write_lsn, uint64_t flush_lsn); +bool sentinel_update_transform_lsn(DatabaseCatalog *catalog, uint64_t transform_lsn); bool sentinel_update_replay_lsn(DatabaseCatalog *catalog, uint64_t replay_lsn); bool sentinel_get(DatabaseCatalog *catalog, CopyDBSentinel *sentinel); @@ -508,6 +510,10 @@ bool sentinel_sync_recv(DatabaseCatalog *catalog, uint64_t flush_lsn, CopyDBSentinel *sentinel); +bool sentinel_sync_transform(DatabaseCatalog *catalog, + uint64_t transform_lsn, + CopyDBSentinel *sentinel); + bool sentinel_sync_apply(DatabaseCatalog *catalog, uint64_t replay_lsn, CopyDBSentinel *sentinel); diff --git a/src/bin/pgcopydb/follow.c b/src/bin/pgcopydb/follow.c index c2ad996ae..4a8f2a801 100644 --- a/src/bin/pgcopydb/follow.c +++ b/src/bin/pgcopydb/follow.c @@ -251,18 +251,6 @@ follow_get_sentinel(StreamSpecs *specs, CopyDBSentinel *sentinel, bool verbose) bool follow_main_loop(CopyDataSpec *copySpecs, StreamSpecs *streamSpecs) { - /* - * Remove the possibly still existing stream context files from - * previous round of operations (--resume, etc). We want to make - * sure that the catchup process reads the files created on this - * connection. - */ - if (!stream_cleanup_context(streamSpecs)) - { - /* errors have already been logged */ - return false; - } - DatabaseCatalog *sourceDB = &(copySpecs->catalogs.source); if (!catalog_open(sourceDB)) @@ -417,17 +405,6 @@ follow_prepare_mode_switch(StreamSpecs *streamSpecs, { log_info("Catching-up from existing on-disk files"); - if (streamSpecs->system.timeline == 0) - { - if (!stream_read_context(streamSpecs)) - { - log_error("Failed to read the streaming context information " - "from the source database and internal catalogs, " - "see above for details"); - return false; - } - } - /* * If the previous mode was catch-up, then before proceeding, we might need * to empty the transform queue where the STOP message was sent. @@ -517,19 +494,6 @@ followDB(CopyDataSpec *copySpecs, StreamSpecs *streamSpecs) return false; } - /* - * Before starting sub-processes, clean-up intermediate files from previous - * round. Here that's the stream context with WAL segment size and timeline - * history, which are fetched from the source server to compute WAL file - * names. The current timeline can only change at a server restart or a - * failover, both with trigger a reconnect. - */ - if (!stream_cleanup_context(streamSpecs)) - { - /* errors have already been logged */ - return false; - } - /* * Before starting sub-processes, make sure to close our SQLite catalogs. * We open the SQLite catalogs again before returning from this function diff --git a/src/bin/pgcopydb/ld_apply.c b/src/bin/pgcopydb/ld_apply.c index 9194e0700..88ff4f4c1 100644 --- a/src/bin/pgcopydb/ld_apply.c +++ b/src/bin/pgcopydb/ld_apply.c @@ -184,6 +184,7 @@ stream_apply_setup(StreamSpecs *specs, StreamApplyContext *context) /* init our context */ if (!stream_apply_init_context(context, specs->sourceDB, + specs->replayDB, &(specs->paths), specs->connStrings, specs->origin, @@ -208,23 +209,12 @@ stream_apply_setup(StreamSpecs *specs, StreamApplyContext *context) return true; } - if (specs->system.timeline == 0) + if (!ld_store_open_replaydb(specs)) { - if (!stream_read_context(specs)) - { - log_error("Failed to read the streaming context information " - "from the source database and internal catalogs, " - "see above for details"); - return false; - } + /* errors have already been logged */ + return false; } - context->system = specs->system; - context->WalSegSz = specs->WalSegSz; - - log_debug("Source database wal_segment_size is %u", context->WalSegSz); - log_debug("Source database timeline is %d", context->system.timeline); - /* * Use the replication origin for our setup (context->previousLSN). */ @@ -1358,6 +1348,7 @@ setupReplicationOrigin(StreamApplyContext *context) bool stream_apply_init_context(StreamApplyContext *context, DatabaseCatalog *sourceDB, + DatabaseCatalog *replayDB, CDCPaths *paths, ConnStrings *connStrings, char *origin, @@ -1395,6 +1386,12 @@ stream_apply_init_context(StreamApplyContext *context, strlcpy(context->origin, origin, sizeof(context->origin)); + /* + * TODO: get rid of WalSegSz entirely. In the meantime, have it set to a + * fixed value as in the old Postgres versions. + */ + context->WalSegSz = 16 * 1024 * 1024; + return true; } diff --git a/src/bin/pgcopydb/ld_store.c b/src/bin/pgcopydb/ld_store.c new file mode 100644 index 000000000..5621f53de --- /dev/null +++ b/src/bin/pgcopydb/ld_store.c @@ -0,0 +1,446 @@ +/* + * src/bin/pgcopydb/ld_store.c + * Implementation of a CLI to copy a database between two Postgres instances + */ + +#include +#include +#include +#include +#include + +#include "parson.h" + +#include "copydb.h" +#include "ld_stream.h" +#include "lock_utils.h" +#include "log.h" +#include "parsing_utils.h" +#include "pg_utils.h" +#include "schema.h" +#include "signals.h" +#include "string_utils.h" + + +/* + * ld_store_open_replaydb opens the current replaydb file if it already exists, + * or create a new replaydb SQLite file for processing the streaming data. + */ +bool +ld_store_open_replaydb(StreamSpecs *specs) +{ + StreamContext *privateContext = &(specs->private); + DatabaseCatalog *replayDB = specs->replayDB; + + if (!ld_store_current_filename(specs)) + { + /* errors have already been logged */ + return false; + } + + bool createReplayDB = IS_EMPTY_STRING_BUFFER(replayDB->dbfile); + + /* if we don't have a replayDB filename yet, it's time to create it */ + if (IS_EMPTY_STRING_BUFFER(replayDB->dbfile)) + { + if (privateContext->timeline == 0) + { + log_error("BUG: ld_store_open_replaydb: timeline is zero"); + return false; + } + + sformat(replayDB->dbfile, MAXPGPATH, "%s/%08d-%08X-%08X.db", + specs->paths.dir, + privateContext->timeline, + LSN_FORMAT_ARGS(privateContext->startpos)); + } + + log_info("%s CDC file \"%s\"", + createReplayDB ? "Creating" : "Opening", + replayDB->dbfile); + + /* now open the replaydb */ + if (!catalog_init(replayDB)) + { + log_error("Failed to open the current replay database \"%s\", " + "see above for details", + replayDB->dbfile); + return false; + } + + if (createReplayDB) + { + if (!ld_store_insert_cdc_filename(specs)) + { + log_error("Failed to register the current replay database \"%s\", " + "see above for details", + replayDB->dbfile); + return false; + } + } + + return true; +} + + +/* + * ld_store_current_filename queries the sourceDB SQLite catalog for an open + * file for the current timeline. + */ +bool +ld_store_current_filename(StreamSpecs *specs) +{ + StreamContext *privateContext = &(specs->private); + + sqlite3 *db = specs->sourceDB->db; + + if (db == NULL) + { + log_error("BUG: ld_store_current_filename: db is NULL"); + return false; + } + + char *sql = + " select filename " + " from cdc_files " + " where done_time_epoch is null " + " and startpos <= $1 " + " and (endpos is null or $2 <= endpos) " + " and case when $3 > 0 then timeline = $4 end " + "order by timeline desc " + " limit 1"; + + SQLiteQuery query = { + .context = specs->replayDB->dbfile, + .fetchFunction = &ld_store_current_filename_fetch + }; + + if (!catalog_sql_prepare(db, sql, &query)) + { + /* errors have already been logged */ + return false; + } + + uint64_t startpos = privateContext->startpos; + uint64_t endpos = privateContext->endpos; + + char slsn[PG_LSN_MAXLENGTH] = { 0 }; + char elsn[PG_LSN_MAXLENGTH] = { 0 }; + + sformat(slsn, sizeof(slsn), "%X/%X", LSN_FORMAT_ARGS(startpos)); + sformat(elsn, sizeof(elsn), "%X/%X", LSN_FORMAT_ARGS(endpos)); + + /* bind our parameters now */ + BindParam params[] = { + { BIND_PARAMETER_TYPE_TEXT, "startpos", 0, slsn }, + { BIND_PARAMETER_TYPE_TEXT, "endpos", 0, elsn }, + { BIND_PARAMETER_TYPE_INT, "timeline", privateContext->timeline, 0 }, + { BIND_PARAMETER_TYPE_INT, "timeline", privateContext->timeline, 0 }, + }; + + int count = sizeof(params) / sizeof(params[0]); + + if (!catalog_sql_bind(&query, params, count)) + { + /* errors have already been logged */ + return false; + } + + /* now execute the query, which does not return any row */ + if (!catalog_sql_execute_once(&query)) + { + /* errors have already been logged */ + return false; + } + + return true; +} + + +/* + * ld_store_current_filename_fetch is a SQLiteQuery callback. + */ +bool +ld_store_current_filename_fetch(SQLiteQuery *query) +{ + char *filename = (char *) query->context; + + if (sqlite3_column_type(query->ppStmt, 0) == SQLITE_NULL) + { + query->context = NULL; + } + else + { + strlcpy(filename, + (char *) sqlite3_column_text(query->ppStmt, 0), + MAXPGPATH); + + log_info("ld_store_current_filename_fetch: \"%s\"", filename); + } + + return true; +} + + +/* + * ld_store_insert_cdc_filename inserts a new entry to the streaming + * table of replaydb filename with metadata. + */ +bool +ld_store_insert_cdc_filename(StreamSpecs *specs) +{ + StreamContext *privateContext = &(specs->private); + + sqlite3 *db = specs->sourceDB->db; + + if (db == NULL) + { + log_error("BUG: ld_store_insert_cdc_filename: db is NULL"); + return false; + } + + char *sql = + "insert into cdc_files(filename, timeline, startpos, start_time_epoch)" + "values($1, $2, $3, $4)"; + + SQLiteQuery query = { 0 }; + + if (!catalog_sql_prepare(db, sql, &query)) + { + /* errors have already been logged */ + return false; + } + + char lsn[PG_LSN_MAXLENGTH] = { 0 }; + sformat(lsn, sizeof(lsn), "%X/%X", LSN_FORMAT_ARGS(privateContext->startpos)); + + uint64_t startTime = time(NULL); + + /* bind our parameters now */ + BindParam params[] = { + { BIND_PARAMETER_TYPE_TEXT, "filename", 0, specs->replayDB->dbfile }, + { BIND_PARAMETER_TYPE_INT, "timeline", privateContext->timeline, NULL }, + { BIND_PARAMETER_TYPE_TEXT, "startpos", 0, lsn }, + { BIND_PARAMETER_TYPE_INT64, "start_time_epoch", startTime, NULL } + }; + + int count = sizeof(params) / sizeof(params[0]); + + if (!catalog_sql_bind(&query, params, count)) + { + /* errors have already been logged */ + return false; + } + + /* now execute the query, which does not return any row */ + if (!catalog_sql_execute_once(&query)) + { + /* errors have already been logged */ + return false; + } + + return true; +} + + +/* + * ld_store_insert_timeline_history inserts a timeline history entry to our + * SQLite catalogs. + */ +bool +ld_store_insert_timeline_history(DatabaseCatalog *catalog, + uint32_t tli, + uint64_t startpos, + uint64_t endpos) +{ + sqlite3 *db = catalog->db; + + if (db == NULL) + { + log_error("BUG: ld_store_insert_timeline_history: db is NULL"); + return false; + } + + char *sql = + "insert or replace into timeline_history(tli, startpos, endpos)" + "values($1, $2, $3)"; + + SQLiteQuery query = { 0 }; + + if (!catalog_sql_prepare(db, sql, &query)) + { + /* errors have already been logged */ + return false; + } + + char slsn[PG_LSN_MAXLENGTH] = { 0 }; + char elsn[PG_LSN_MAXLENGTH] = { 0 }; + + sformat(slsn, sizeof(slsn), "%X/%X", LSN_FORMAT_ARGS(startpos)); + sformat(elsn, sizeof(elsn), "%X/%X", LSN_FORMAT_ARGS(endpos)); + + /* bind our parameters now */ + BindParam params[] = { + { BIND_PARAMETER_TYPE_INT, "tli", tli, NULL }, + { BIND_PARAMETER_TYPE_TEXT, "startpos", 0, slsn }, + { BIND_PARAMETER_TYPE_TEXT, "endpos", 0, elsn } + }; + + int count = sizeof(params) / sizeof(params[0]); + + if (!catalog_sql_bind(&query, params, count)) + { + /* errors have already been logged */ + return false; + } + + /* now execute the query, which does not return any row */ + if (!catalog_sql_execute_once(&query)) + { + /* errors have already been logged */ + return false; + } + + return true; +} + + +/* + * ld_store_insert_message inserts a logical decoding output plugin message + * into our SQLite file format. + */ +bool +ld_store_insert_message(DatabaseCatalog *catalog, + LogicalMessageMetadata *metadata) +{ + sqlite3 *db = catalog->db; + + if (db == NULL) + { + log_error("BUG: ld_store_insert_message: db is NULL"); + return false; + } + + char *sql = + "insert or replace into output(action, xid, lsn, timestamp, message)" + "values($1, $2, $3, $4, $5)"; + + SQLiteQuery query = { 0 }; + + if (!catalog_sql_prepare(db, sql, &query)) + { + /* errors have already been logged */ + return false; + } + + /* only BEGIN/COMMIT messages have an xid */ + BindParameterType xidParamType = + metadata->xid == 0 ? BIND_PARAMETER_TYPE_NULL : BIND_PARAMETER_TYPE_INT64; + + char action[2] = { metadata->action, '\0' }; + char lsn[PG_LSN_MAXLENGTH] = { 0 }; + + sformat(lsn, sizeof(lsn), "%X/%X", LSN_FORMAT_ARGS(metadata->lsn)); + + /* bind our parameters now */ + BindParam params[] = { + { BIND_PARAMETER_TYPE_TEXT, "action", 0, action }, + { xidParamType, "xid", metadata->xid, NULL }, + { BIND_PARAMETER_TYPE_TEXT, "lsn", 0, lsn }, + { BIND_PARAMETER_TYPE_TEXT, "timestamp", 0, metadata->timestamp }, + { BIND_PARAMETER_TYPE_TEXT, "message", 0, metadata->jsonBuffer } + }; + + int count = sizeof(params) / sizeof(params[0]); + + if (!catalog_sql_bind(&query, params, count)) + { + /* errors have already been logged */ + return false; + } + + /* now execute the query, which does not return any row */ + if (!catalog_sql_execute_once(&query)) + { + /* errors have already been logged */ + return false; + } + + return true; +} + + +/* + * ld_store_insert_message inserts a logical decoding output plugin message + * into our SQLite file format. + */ +bool +ld_store_insert_internal_message(DatabaseCatalog *catalog, + InternalMessage *message) +{ + sqlite3 *db = catalog->db; + + if (db == NULL) + { + log_error("BUG: ld_store_insert_internal_message: db is NULL"); + return false; + } + + char *sql = + "insert or replace into output(action, lsn, timestamp)" + "values($1, $2, $3)"; + + SQLiteQuery query = { 0 }; + + if (!catalog_sql_prepare(db, sql, &query)) + { + /* errors have already been logged */ + return false; + } + + /* not all internal messages have a time entry */ + BindParameterType timeParamType = + message->time > 0 ? BIND_PARAMETER_TYPE_TEXT : BIND_PARAMETER_TYPE_NULL; + + if (message->time > 0) + { + /* add the server sendTime to the LogicalMessageMetadata */ + if (!pgsql_timestamptz_to_string(message->time, + message->timeStr, + sizeof(message->timeStr))) + { + log_error("Failed to format server send time %lld to time string", + (long long) message->time); + return false; + } + } + + char action[2] = { message->action, '\0' }; + char lsn[PG_LSN_MAXLENGTH] = { 0 }; + + sformat(lsn, sizeof(lsn), "%X/%X", LSN_FORMAT_ARGS(message->lsn)); + + /* bind our parameters now */ + BindParam params[] = { + { BIND_PARAMETER_TYPE_TEXT, "action", 0, action }, + { BIND_PARAMETER_TYPE_TEXT, "lsn", 0, lsn }, + { timeParamType, "timestamp", 0, message->timeStr } + }; + + int count = sizeof(params) / sizeof(params[0]); + + if (!catalog_sql_bind(&query, params, count)) + { + /* errors have already been logged */ + return false; + } + + /* now execute the query, which does not return any row */ + if (!catalog_sql_execute_once(&query)) + { + /* errors have already been logged */ + return false; + } + + return true; +} diff --git a/src/bin/pgcopydb/ld_stream.c b/src/bin/pgcopydb/ld_stream.c index 73a11ed29..6fc28b58f 100644 --- a/src/bin/pgcopydb/ld_stream.c +++ b/src/bin/pgcopydb/ld_stream.c @@ -33,9 +33,6 @@ #include "summary.h" -static bool updateStreamCounters(StreamContext *context, - LogicalMessageMetadata *metadata); - /* * stream_init_specs initializes Change Data Capture streaming specifications * from a copyDBSpecs structure. @@ -49,6 +46,7 @@ stream_init_specs(StreamSpecs *specs, uint64_t endpos, LogicalStreamMode mode, DatabaseCatalog *sourceDB, + DatabaseCatalog *replayDB, bool stdin, bool stdout, bool logSQL) @@ -66,6 +64,7 @@ stream_init_specs(StreamSpecs *specs, * Open the specified sourceDB catalog. */ specs->sourceDB = sourceDB; + specs->replayDB = replayDB; if (!catalog_init(specs->sourceDB)) { @@ -358,6 +357,12 @@ stream_init_context(StreamSpecs *specs) privateContext->connStrings = specs->connStrings; + /* + * TODO: get rid of WalSegSz entirely. In the meantime, have it set to a + * fixed value as in the old Postgres versions. + */ + privateContext->WalSegSz = 16 * 1024 * 1024; + /* * When using PIPEs for inter-process communication, makes sure the PIPEs * are ready for us to use and not broken, as in EBADF. @@ -410,6 +415,66 @@ stream_init_context(StreamSpecs *specs) /* transform needs some catalog lookups (pkey, type oid) */ privateContext->sourceDB = specs->sourceDB; + /* replayDB is needed too */ + privateContext->replayDB = specs->replayDB; + + return true; +} + + +/* + * stream_init_timeline registers the timeline history information into our + * SQLite catalogs, and opens (and initializes if needed) the current replayDB + * SQLite file. + */ +bool +stream_init_timeline(StreamSpecs *specs, LogicalStreamClient *stream) +{ + DatabaseCatalog *sourceDB = specs->sourceDB; + StreamContext *privateContext = &(specs->private); + + privateContext->timeline = stream->system.timeline; + + /* insert the timeline history into our catalogs */ + if (stream->system.timelines.count == 0) + { + if (!ld_store_insert_timeline_history(sourceDB, + 1, + InvalidXLogRecPtr, + InvalidXLogRecPtr)) + { + log_error("Failed to initialize source database timeline history, " + "see above for details"); + return false; + } + } + else + { + for (int i = 0; i < stream->system.timelines.count; i++) + { + uint32_t tli = stream->system.timelines.history[i].tli; + uint64_t startpos = stream->system.timelines.history[i].begin; + uint64_t endpos = stream->system.timelines.history[i].end; + + if (!ld_store_insert_timeline_history(sourceDB, + tli, + startpos, + endpos)) + { + log_error("Failed to initialize source database timeline history, " + "see above for details"); + return false; + } + } + } + + /* now that we have the current timeline and startpos lsn */ + if (!ld_store_open_replaydb(specs)) + { + /* errors have already been logged */ + return false; + } + return true; } @@ -488,18 +553,13 @@ startLogicalStreaming(StreamSpecs *specs) return false; } - log_debug("startLogicalStreaming: %s (%d)", - OutputPluginToString(specs->slot.plugin), - specs->pluginOptions.count); - if (!pgsql_start_replication(&stream)) { /* errors have already been logged */ return false; } - /* write the wal_segment_size and timeline history files */ - if (!stream_write_context(specs, &stream)) + if (!stream_init_timeline(specs, &stream)) { /* errors have already been logged */ return false; @@ -537,11 +597,8 @@ startLogicalStreaming(StreamSpecs *specs) } else { - log_warn("Streaming got interrupted at %X/%X " - "after processing %lld message%s", - LSN_FORMAT_ARGS(context.tracking->written_lsn), - (long long) privateContext->counters.total, - privateContext->counters.total > 0 ? "s" : ""); + log_warn("Streaming got interrupted at %X/%X", + LSN_FORMAT_ARGS(context.tracking->written_lsn)); } /* if we are going to retry, we need to rollback the last txn */ @@ -568,19 +625,10 @@ startLogicalStreaming(StreamSpecs *specs) bool streamCheckResumePosition(StreamSpecs *specs) { - StreamContent latestStreamedContent = { 0 }; - - if (!stream_read_latest(specs, &latestStreamedContent)) - { - /* errors have already been logged */ - return false; - } - /* - * When we don't have any file on-disk yet, we might have specifications - * for when to start in the pgcopydb sentinel table. The sentinel only - * applies to STREAM_MODE_PREFETCH, in STREAM_MODE_RECEIVE we bypass that - * mechanism entirely. + * We might have specifications for when to start in the pgcopydb sentinel + * table. The sentinel only applies to STREAM_MODE_PREFETCH, in + * STREAM_MODE_RECEIVE we bypass that mechanism entirely. * * When STREAM_MODE_PREFETCH is set, it is expected that the pgcopydb * sentinel table has been setup before starting the logical decoding @@ -626,40 +674,12 @@ streamCheckResumePosition(StreamSpecs *specs) LSN_FORMAT_ARGS(specs->endpos)); } - if (latestStreamedContent.lbuf.count == 0) - { - if (specs->mode == STREAM_MODE_RECEIVE) - { - return true; - } - - if (sentinel.startpos != InvalidXLogRecPtr) - { - specs->startpos = sentinel.startpos; - - log_info("Resuming streaming at LSN %X/%X " - "from replication slot \"%s\"", - LSN_FORMAT_ARGS(specs->startpos), - specs->slot.slotName); - } - } - else + if (sentinel.startpos != InvalidXLogRecPtr) { - /* lines are counted starting at zero */ - int lastLineNb = latestStreamedContent.lbuf.count - 1; - - LogicalMessageMetadata *messages = latestStreamedContent.messages; - LogicalMessageMetadata *latest = &(messages[lastLineNb]); + specs->startpos = sentinel.startpos; - specs->startpos = latest->lsn; - - log_info("Resuming streaming at LSN %X/%X " - "from JSON file \"%s\" ", - LSN_FORMAT_ARGS(specs->startpos), - latestStreamedContent.filename); - - char *latestMessage = latestStreamedContent.lbuf.lines[lastLineNb]; - log_notice("Resume replication from latest message: %s", latestMessage); + log_info("Resuming streaming at LSN %X/%X (sentinel startpos)", + LSN_FORMAT_ARGS(specs->startpos)); } PGSQL src = { 0 }; @@ -679,6 +699,13 @@ streamCheckResumePosition(StreamSpecs *specs) return false; } + int logLevel = lsn == specs->startpos ? LOG_NOTICE : LOG_INFO; + + log_level(logLevel, + "Replication slot \"%s\" current lsn is %X/%X", + specs->slot.slotName, + LSN_FORMAT_ARGS(lsn)); + /* * The receive process knows how to skip over LSNs that have already been * fetched in a previous run. What we are not able to do is fill-in a gap @@ -686,7 +713,7 @@ streamCheckResumePosition(StreamSpecs *specs) */ if (specs->startpos < lsn) { - log_error("Failed to resume replication: on-disk next LSN is %X/%X " + log_error("Failed to resume replication: sentinel.startpos is %X/%X " "and replication slot LSN is %X/%X", LSN_FORMAT_ARGS(specs->startpos), LSN_FORMAT_ARGS(lsn)); @@ -710,6 +737,7 @@ streamWrite(LogicalStreamContext *context) { StreamContext *privateContext = (StreamContext *) context->private; LogicalMessageMetadata *metadata = &(privateContext->metadata); + DatabaseCatalog *replayDB = privateContext->replayDB; if (!prepareMessageMetadataFromContext(context)) { @@ -730,16 +758,38 @@ streamWrite(LogicalStreamContext *context) /* write the actual JSON message to file, unless instructed not to */ if (!metadata->skipping) { - bool previous = false; + if (context->onRetry) + { + /* + * When retrying due to a transient network error or server conn + * failure, we need to rollback the last incomplete transaction. + * + * Otherwise, we would end up with a partial transaction in the + * JSON file, and the transform process would fail to process it. + */ + if (privateContext->transactionInProgress) + { + InternalMessage rollback = { + .action = STREAM_ACTION_ROLLBACK, + .lsn = context->cur_record_lsn + }; - if (!stream_write_json(context, previous)) + if (!ld_store_insert_internal_message(replayDB, &rollback)) + { + /* errors have already been logged */ + return false; + } + } + + context->onRetry = false; + } + + /* insert the message to our current SQLite logical decoding file */ + if (!ld_store_insert_message(replayDB, metadata)) { /* errors have already been logged */ return false; } - - /* update internal transaction counters */ - (void) updateStreamCounters(privateContext, metadata); } if (metadata->xid > 0) @@ -756,145 +806,9 @@ streamWrite(LogicalStreamContext *context) LSN_FORMAT_ARGS(metadata->lsn)); } - return true; -} - - -/* - * stream_write_json writes the current (or previous) Logical Message to disk. - */ -bool -stream_write_json(LogicalStreamContext *context, bool previous) -{ - StreamContext *privateContext = (StreamContext *) context->private; - LogicalMessageMetadata *metadata = - previous ? &(privateContext->previous) : &(privateContext->metadata); - - /* we might have to rotate to the next on-disk file */ - if (!streamRotateFile(context)) - { - /* errors have already been logged */ - return false; - } - - /* - * Write the logical decoding message to disk, appending to the already - * opened file we track in the privateContext. - */ - if (privateContext->jsonFile == NULL) - { - log_error("Failed to write Logical Message: jsonFile is NULL"); - return false; - } - - if (context->onRetry) - { - /* - * When retrying due to a transient network error or server conn - * failure, we need to rollback the last incomplete transaction. - * - * Otherwise, we would end up with a partial transaction in the - * JSON file, and the transform process would fail to process it. - */ - if (privateContext->transactionInProgress) - { - InternalMessage rollback = { - .action = STREAM_ACTION_ROLLBACK, - .lsn = context->cur_record_lsn - }; - - if (!stream_write_internal_message(context, &rollback)) - { - /* errors have already been logged */ - return false; - } - } - - context->onRetry = false; - } - - /* prepare a in-memory buffer with the whole data formatted in JSON */ - PQExpBuffer buffer = createPQExpBuffer(); - - if (buffer == NULL) - { - log_fatal("Failed to allocate memory to prepare JSON message"); - return false; - } - - appendPQExpBuffer(buffer, - "{\"action\":\"%c\"," - "\"xid\":\"%lld\"," - "\"lsn\":\"%X/%X\"," - "\"timestamp\":\"%s\"," - "\"message\":", - metadata->action, - (long long) metadata->xid, - LSN_FORMAT_ARGS(metadata->lsn), - metadata->timestamp); - - appendPQExpBuffer(buffer, "%s}\n", metadata->jsonBuffer); - - /* memory allocation could have failed while building string */ - if (PQExpBufferBroken(buffer)) - { - log_error("Failed to prepare JSON message: out of memory"); - destroyPQExpBuffer(buffer); - return false; - } - - /* then add the logical output plugin data, inside our own JSON format */ - if (!write_to_stream(privateContext->jsonFile, buffer->data, buffer->len)) - { - log_error("Failed to write to file \"%s\": see above for details", - privateContext->partialFileName); - destroyPQExpBuffer(buffer); - return false; - } - - /* time to update our lastWriteTime mark */ - privateContext->lastWriteTime = time(NULL); - - /* update the tracking for maximum LSN of messages written to disk so far */ - if (privateContext->maxWrittenLSN < metadata->lsn) - { - privateContext->maxWrittenLSN = metadata->lsn; - } - - /* - * Now if specs->stdOut is true we want to also write all the same things - * again to stdout this time. We don't expect buffered IO to stdout, so we - * don't loop and retry short writes there. - */ - if (privateContext->stdOut) - { - if (!write_to_stream(privateContext->out, buffer->data, buffer->len)) - { - log_error("Failed to write JSON message to stdout: " - "see above for details"); - log_debug("JSON message: %s", buffer->data); - - destroyPQExpBuffer(buffer); - return false; - } - - /* flush stdout at transaction boundaries */ - if (metadata->action == STREAM_ACTION_COMMIT) - { - if (fflush(privateContext->out) != 0) - { - log_error("Failed to flush standard output: %m"); - destroyPQExpBuffer(buffer); - return false; - } - } - } - - destroyPQExpBuffer(buffer); - /* * Maintain the transaction progress based on the BEGIN and COMMIT messages - * received from replication slot. We don't care about the other messages. + * received from replication slot. */ if (metadata->action == STREAM_ACTION_BEGIN) { @@ -920,86 +834,6 @@ stream_write_json(LogicalStreamContext *context, bool previous) } -/* - * stream_write_internal_message outputs an internal message for pgcopydb - * operations into our current stream output(s). - */ -bool -stream_write_internal_message(LogicalStreamContext *context, - InternalMessage *message) -{ - StreamContext *privateContext = (StreamContext *) context->private; - - long buflen = 0; - char buffer[BUFSIZE] = { 0 }; - - /* not all internal message require a timestamp field */ - if (message->time > 0) - { - /* add the server sendTime to the LogicalMessageMetadata */ - if (!pgsql_timestamptz_to_string(message->time, - message->timeStr, - sizeof(message->timeStr))) - { - log_error("Failed to format server send time %lld to time string", - (long long) message->time); - return false; - } - - char *fmt = - "{\"action\":\"%c\",\"lsn\":\"%X/%X\",\"timestamp\":\"%s\"}\n"; - - buflen = sformat(buffer, sizeof(buffer), fmt, - message->action, - LSN_FORMAT_ARGS(message->lsn), - message->timeStr); - } - else - { - buflen = sformat(buffer, sizeof(buffer), - "{\"action\":\"%c\",\"lsn\":\"%X/%X\"}\n", - message->action, - LSN_FORMAT_ARGS(message->lsn)); - } - - if (!write_to_stream(privateContext->jsonFile, buffer, buflen)) - { - log_error("Failed to write internal message: %.1024s%s", - buffer, - buflen > 1024 ? "..." : ""); - return false; - } - - /* skip NOTICE logs for KEEPALIVE messages */ - if (message->action != STREAM_ACTION_KEEPALIVE) - { - log_notice("Inserted action %s for lsn %X/%X in \"%s\"", - StreamActionToString(message->action), - LSN_FORMAT_ARGS(message->lsn), - privateContext->partialFileName); - } - - /* - * When streaming to a Unix pipe don't forget to also stream the SWITCH - * WAL message there, so that the transform process forwards it. - */ - if (privateContext->stdOut) - { - if (!write_to_stream(privateContext->out, buffer, buflen)) - { - log_error("Failed to write JSON message (%ld bytes) to stdout: %m", - buflen); - log_debug("JSON message: %.1024s%s", - buffer, - buflen > 1024 ? "..." : ""); - return false; - } - } - - return true; -} - - /* * streamRotate decides if the received message should be appended to the * already opened file or to a new file, and then opens that file and takes @@ -1011,6 +845,7 @@ bool streamRotateFile(LogicalStreamContext *context) { StreamContext *privateContext = (StreamContext *) context->private; + DatabaseCatalog *replayDB = privateContext->replayDB; /* get the segment number from the current_record_lsn */ XLogSegNo segno; @@ -1099,8 +934,8 @@ streamRotateFile(LogicalStreamContext *context) } /* compute the WAL filename that would host the current message */ - XLByteToSeg(jsonFileLSN, segno, context->WalSegSz); - XLogFileName(wal, context->timeline, segno, context->WalSegSz); + XLByteToSeg(jsonFileLSN, segno, privateContext->WalSegSz); + XLogFileName(wal, context->timeline, segno, privateContext->WalSegSz); sformat(walFileName, sizeof(walFileName), "%s/%s.json", privateContext->paths.dir, @@ -1127,7 +962,7 @@ streamRotateFile(LogicalStreamContext *context) .lsn = jsonFileLSN }; - if (!stream_write_internal_message(context, &switchwal)) + if (!ld_store_insert_internal_message(replayDB, &switchwal)) { /* errors have already been logged */ return false; @@ -1201,19 +1036,6 @@ streamRotateFile(LogicalStreamContext *context) log_notice("Now streaming changes to \"%s\"", partialFileName); - /* - * Also maintain the "latest" symbolic link to the latest file where - * we've been streaming changes in. - */ - if (!stream_update_latest_symlink(privateContext, - privateContext->partialFileName)) - { - log_error("Failed to update latest symlink to \"%s\", " - "see above for details", - privateContext->partialFileName); - return false; - } - return true; } @@ -1227,6 +1049,7 @@ bool streamCloseFile(LogicalStreamContext *context, bool time_to_abort) { StreamContext *privateContext = (StreamContext *) context->private; + DatabaseCatalog *replayDB = privateContext->replayDB; /* * Before closing the JSON file, when we have reached endpos add a pgcopydb @@ -1247,7 +1070,7 @@ streamCloseFile(LogicalStreamContext *context, bool time_to_abort) .lsn = context->cur_record_lsn }; - if (!stream_write_internal_message(context, &endpos)) + if (!ld_store_insert_internal_message(replayDB, &endpos)) { /* errors have already been logged */ return false; @@ -1273,7 +1096,7 @@ streamCloseFile(LogicalStreamContext *context, bool time_to_abort) .lsn = context->cur_record_lsn }; - if (!stream_write_internal_message(context, &rollback)) + if (!ld_store_insert_internal_message(replayDB, &rollback)) { /* errors have already been logged */ return false; @@ -1316,16 +1139,6 @@ streamCloseFile(LogicalStreamContext *context, bool time_to_abort) return false; } - /* and also update the "latest" symlink, we need it for --resume */ - if (!stream_update_latest_symlink(privateContext, - privateContext->walFileName)) - { - log_error("Failed to update latest symlink to \"%s\", " - "see above for details", - privateContext->walFileName); - return false; - } - log_notice("Closed file \"%s\"", privateContext->walFileName); } @@ -1463,6 +1276,7 @@ bool streamKeepalive(LogicalStreamContext *context) { StreamContext *privateContext = (StreamContext *) context->private; + DatabaseCatalog *replayDB = privateContext->replayDB; /* skip LSN 0/0 at the start of streaming */ if (context->cur_record_lsn == InvalidXLogRecPtr) @@ -1486,7 +1300,7 @@ streamKeepalive(LogicalStreamContext *context) .time = context->sendTime }; - if (!stream_write_internal_message(context, &keepalive)) + if (!ld_store_insert_internal_message(replayDB, &keepalive)) { /* errors have already been logged */ return false; @@ -1721,11 +1535,10 @@ prepareMessageMetadataFromContext(LogicalStreamContext *context) previous->skipping = false; metadata->skipping = false; - bool previous = true; - - if (!stream_write_json(context, previous)) + /* insert the message to our current SQLite logical decoding file */ + if (!ld_store_insert_message(privateContext->replayDB, previous)) { - /* errors have been logged */ + /* errors have already been logged */ return false; } } @@ -1949,192 +1762,6 @@ parseMessageMetadata(LogicalMessageMetadata *metadata, } -/* - * stream_read_file reads a JSON file that is expected to contain messages - * received via logical decoding when using the wal2json output plugin with the - * format-version 2. - */ -bool -stream_read_file(StreamContent *content) -{ - long size = 0L; - char *contents = NULL; - - if (!read_file(content->filename, &contents, &size)) - { - /* errors have already been logged */ - return false; - } - - if (!splitLines(&(content->lbuf), contents)) - { - /* errors have already been logged */ - return false; - } - - /* - * If the file contains zero lines, we're done already, Also malloc(zero) - * leads to "corrupted size vs. prev_size" run-time errors. - */ - if (content->lbuf.count == 0) - { - return true; - } - - content->messages = - (LogicalMessageMetadata *) calloc(content->lbuf.count, - sizeof(LogicalMessageMetadata)); - - if (content->messages == NULL) - { - log_error(ALLOCATION_FAILED_ERROR); - return false; - } - - for (uint64_t i = 0; i < content->lbuf.count; i++) - { - char *message = content->lbuf.lines[i]; - LogicalMessageMetadata *metadata = &(content->messages[i]); - - JSON_Value *json = json_parse_string(message); - - if (!parseMessageMetadata(metadata, message, json, false)) - { - /* errors have already been logged */ - return false; - } - } - - return true; -} - - -/* - * stream_read_latest reads the "latest" file that was written into, if any, - * using the symbolic link named "latest". When the file exists, its content is - * parsed as an array of LogicalMessageMetadata. - * - * One message per physical line is expected (wal2json uses Postgres internal - * function escape_json which deals with escaping newlines and other special - * characters). - */ -bool -stream_read_latest(StreamSpecs *specs, StreamContent *content) -{ - char latest[MAXPGPATH] = { 0 }; - - sformat(latest, sizeof(latest), "%s/latest", specs->paths.dir); - - if (!file_exists(latest)) - { - return true; - } - - if (!normalize_filename(latest, - content->filename, - sizeof(content->filename))) - { - /* errors have already been logged */ - return false; - } - - log_info("Resuming streaming from latest file \"%s\"", content->filename); - - return stream_read_file(content); -} - - -/* - * stream_update_latest_symlink updates the latest symbolic link to the given - * filename, that must already exists on the file system. - */ -bool -stream_update_latest_symlink(StreamContext *privateContext, - const char *filename) -{ - char latest[MAXPGPATH] = { 0 }; - - sformat(latest, sizeof(latest), "%s/latest", privateContext->paths.dir); - - if (!unlink_file(latest)) - { - /* errors have already been logged */ - return false; - } - - if (!create_symbolic_link((char *) filename, latest)) - { - /* errors have already been logged */ - return false; - } - - log_debug("stream_update_latest_symlink: \"%s\" -> \"%s\"", - latest, - privateContext->partialFileName); - - return true; -} - - -/* - * updateStreamCounters increment the counter that matches the received - * message. - */ -static bool -updateStreamCounters(StreamContext *context, LogicalMessageMetadata *metadata) -{ - ++context->counters.total; - - switch (metadata->action) - { - case STREAM_ACTION_BEGIN: - { - ++context->counters.begin; - break; - } - - case STREAM_ACTION_COMMIT: - { - ++context->counters.commit; - break; - } - - case STREAM_ACTION_INSERT: - { - ++context->counters.insert; - break; - } - - case STREAM_ACTION_UPDATE: - { - ++context->counters.update; - break; - } - - case STREAM_ACTION_DELETE: - { - ++context->counters.delete; - break; - } - - case STREAM_ACTION_TRUNCATE: - { - ++context->counters.truncate; - break; - } - - default: - { - log_trace("Skipping counters for message action \"%c\"", - metadata->action); - break; - } - } - - return true; -} - - /* * buildReplicationURI builds a connection string that includes * replication=database from the connection string that's passed as input. @@ -2612,184 +2239,3 @@ stream_fetch_current_lsn(uint64_t *lsn, return true; } - - -/* - * stream_write_context writes the wal_segment_size and tli to files, as well as - * populate our internal catalogs with information in the timeline history file. - */ -bool -stream_write_context(StreamSpecs *specs, LogicalStreamClient *stream) -{ - IdentifySystem *system = &(stream->system); - char wal_segment_size[BUFSIZE] = { 0 }; - - /* also cache the system and WalSegSz in the StreamSpecs */ - specs->system = stream->system; - specs->WalSegSz = stream->WalSegSz; - - int bytes = - sformat(wal_segment_size, sizeof(wal_segment_size), "%lld", - (long long) stream->WalSegSz); - - if (!write_file(wal_segment_size, bytes, specs->paths.walsegsizefile)) - { - /* errors have already been logged */ - return false; - } - - log_debug("Wrote wal_segment_size %s into \"%s\"", - wal_segment_size, - specs->paths.walsegsizefile); - - char tli[BUFSIZE] = { 0 }; - - bytes = sformat(tli, sizeof(tli), "%d", system->timeline); - - if (!write_file(tli, bytes, specs->paths.tlifile)) - { - /* errors have already been logged */ - return false; - } - - log_debug("Wrote tli %s timeline file \"%s\"", tli, specs->paths.tlifile); - - /* read from the timeline history file and populate internal catalogs */ - if (stream->system.timeline > 1 && - !parse_timeline_history_file(stream->system.timelineHistoryFilename, - specs->sourceDB, - stream->system.timeline)) - { - /* errors have already been logged */ - return false; - } - - return true; -} - - -/* - * stream_cleanup_context removes the context files that are created upon - * connecting to the source database with the logical replication protocol. - */ -bool -stream_cleanup_context(StreamSpecs *specs) -{ - bool success = true; - - success = success && unlink_file(specs->paths.walsegsizefile); - success = success && unlink_file(specs->paths.tlifile); - - /* reset the timeline, so that we always read from the disk */ - specs->system.timeline = 0; - - return success; -} - - -/* - * stream_read_context reads the stream context back from files - * wal_segment_size and timeline history. - */ -bool -stream_read_context(StreamSpecs *specs) -{ - CDCPaths *paths = &(specs->paths); - IdentifySystem *system = &(specs->system); - uint32_t *WalSegSz = &(specs->WalSegSz); - - char *wal_segment_size = NULL; - char *tli = NULL; - long size = 0L; - - /* - * We need to read the 3 streaming context files that the receive process - * prepares when connecting to the source database. Because the catchup - * process might get here early, we implement a retry loop in case the - * files have not been created yet. - */ - ConnectionRetryPolicy retryPolicy = { 0 }; - - int maxT = 10; /* 10s */ - int maxSleepTime = 1500; /* 1.5s */ - int baseSleepTime = 100; /* 100ms */ - - (void) pgsql_set_retry_policy(&retryPolicy, - maxT, - -1, /* unbounded number of attempts */ - maxSleepTime, - baseSleepTime); - - while (!pgsql_retry_policy_expired(&retryPolicy)) - { - if (asked_to_stop || asked_to_stop_fast || asked_to_quit) - { - return false; - } - - if (file_exists(paths->walsegsizefile) && - file_exists(paths->tlifile)) - { - /* - * File did exist, but might have been deleted now (race condition - * at prefetch and transform processes start-up). - */ - bool success = true; - - success = success && - read_file(paths->walsegsizefile, &wal_segment_size, &size); - - success = success && - read_file(paths->tlifile, &tli, &size); - - if (success) - { - /* success: break out of the retry loop */ - break; - } - } - - int sleepTimeMs = - pgsql_compute_connection_retry_sleep_time(&retryPolicy); - - log_debug("stream_read_context: waiting for context files " - "to have been created, retrying in %dms", - sleepTimeMs); - - /* we have milliseconds, pg_usleep() wants microseconds */ - (void) pg_usleep(sleepTimeMs * 1000); - } - - /* did retry policy expire before the files are created? */ - if (!(file_exists(paths->walsegsizefile) && - file_exists(paths->tlifile))) - { - log_error("Failed to read stream context file: retry policy expired"); - return false; - } - - /* - * Now that we could read the file contents, parse it. - */ - if (!stringToUInt(wal_segment_size, WalSegSz)) - { - /* errors have already been logged */ - return false; - } - - if (!stringToUInt(tli, &(system->timeline))) - { - /* errors have already been logged */ - return false; - } - - DatabaseCatalog *source = specs->sourceDB; - if (!catalog_lookup_timeline_history(source, system->timeline, - &system->currentTimeline)) - { - /* errors have already been logged */ - return false; - } - - return true; -} diff --git a/src/bin/pgcopydb/ld_stream.h b/src/bin/pgcopydb/ld_stream.h index 9b9ea4965..a5a3a9266 100644 --- a/src/bin/pgcopydb/ld_stream.h +++ b/src/bin/pgcopydb/ld_stream.h @@ -56,17 +56,6 @@ typedef struct InternalMessage char timeStr[BUFSIZE]; } InternalMessage; -typedef struct StreamCounters -{ - uint64_t total; - uint64_t begin; - uint64_t commit; - uint64_t insert; - uint64_t update; - uint64_t delete; - uint64_t truncate; -} StreamCounters; - #define PG_MAX_TIMESTAMP 36 /* "2022-06-27 14:42:21.795714+00" */ @@ -86,7 +75,7 @@ typedef struct LogicalMessageMetadata bool filterOut; bool skipping; - /* the statement part of a PREPARE dseadbeef AS ... */ + /* the statement part of a PREPARE deadbeef AS ... */ char *stmt; /* the raw message in our internal JSON format */ @@ -364,6 +353,7 @@ typedef struct StreamContext /* transform needs some catalog lookups (pkey, type oid) */ DatabaseCatalog *sourceDB; + DatabaseCatalog *replayDB; /* hash table acts as a cache for tables with generated columns */ GeneratedColumnsCache *generatedColumnsCache; @@ -381,8 +371,6 @@ typedef struct StreamContext FILE *jsonFile; FILE *sqlFile; - StreamCounters counters; - bool transactionInProgress; } StreamContext; @@ -428,6 +416,7 @@ typedef struct StreamApplyContext /* apply needs access to the catalogs to register sentinel replay_lsn */ DatabaseCatalog *sourceDB; + DatabaseCatalog *replayDB; uint64_t sentinelSyncTime; ConnStrings *connStrings; @@ -521,6 +510,7 @@ struct StreamSpecs /* transform needs some catalog lookups (pkey, type oid) */ DatabaseCatalog *sourceDB; + DatabaseCatalog *replayDB; /* receive push json filenames to a queue for transform */ Queue transformQueue; @@ -541,7 +531,7 @@ struct StreamSpecs FILE *out; }; - +/* ld_stream.c */ bool stream_init_specs(StreamSpecs *specs, CDCPaths *paths, ConnStrings *connStrings, @@ -550,6 +540,7 @@ bool stream_init_specs(StreamSpecs *specs, uint64_t endpos, LogicalStreamMode mode, DatabaseCatalog *sourceDB, + DatabaseCatalog *replayDB, bool stdIn, bool stdOut, bool logSQL); @@ -560,6 +551,7 @@ char * LogicalStreamModeToString(LogicalStreamMode mode); bool stream_check_in_out(StreamSpecs *specs); bool stream_init_context(StreamSpecs *specs); +bool stream_init_timeline(StreamSpecs *specs, LogicalStreamClient *stream); bool startLogicalStreaming(StreamSpecs *specs); bool streamCheckResumePosition(StreamSpecs *specs); @@ -587,16 +579,6 @@ bool parseMessageMetadata(LogicalMessageMetadata *metadata, bool LogicalMessageValueEq(LogicalMessageValue *a, LogicalMessageValue *b); -bool stream_write_json(LogicalStreamContext *context, bool previous); - -bool stream_write_internal_message(LogicalStreamContext *context, - InternalMessage *message); - -bool stream_read_file(StreamContent *content); -bool stream_read_latest(StreamSpecs *specs, StreamContent *content); -bool stream_update_latest_symlink(StreamContext *privateContext, - const char *filename); - bool stream_sync_sentinel(LogicalStreamContext *context); bool buildReplicationURI(const char *pguri, char **repl_pguri); @@ -618,25 +600,43 @@ bool stream_fetch_current_lsn(uint64_t *lsn, const char *pguri, ConnectionType connectionType); -bool stream_write_context(StreamSpecs *specs, LogicalStreamClient *stream); -bool stream_cleanup_context(StreamSpecs *specs); -bool stream_read_context(StreamSpecs *specs); - StreamAction StreamActionFromChar(char action); char * StreamActionToString(StreamAction action); +/* ld_store.c */ +bool ld_store_open_replaydb(StreamSpecs *specs); +bool ld_store_current_filename(StreamSpecs *specs); +bool ld_store_current_filename_fetch(SQLiteQuery *query); + +bool ld_store_insert_cdc_filename(StreamSpecs *specs); + +bool ld_store_insert_timeline_history(DatabaseCatalog *catalog, + uint32_t tli, + uint64_t startpos, + uint64_t endpos); + +bool ld_store_insert_message(DatabaseCatalog *catalog, + LogicalMessageMetadata *metadata); + +bool ld_store_insert_internal_message(DatabaseCatalog *catalog, + InternalMessage *message); + + /* ld_transform.c */ bool stream_transform_worker(StreamSpecs *specs); bool stream_transform_from_queue(StreamSpecs *specs); + bool stream_transform_add_file(Queue *queue, uint64_t firstLSN); + +bool stream_transform_add_action(Queue *queue, + StreamAction action, + uint64_t lsn); + bool stream_transform_send_stop(Queue *queue); -bool stream_compute_pathnames(uint32_t WalSegSz, - uint32_t timeline, - uint64_t lsn, - char *dir, - char *walFileName, - char *sqlFileName); +bool stream_transform_action_at_lsn(StreamSpecs *specs, + StreamAction action, + uint64_t lsn); bool stream_transform_context_init(StreamSpecs *specs); bool stream_transform_stream(StreamSpecs *specs); @@ -657,6 +657,13 @@ bool stream_transform_file(StreamSpecs *specs, bool stream_transform_file_at_lsn(StreamSpecs *specs, uint64_t lsn); +bool stream_compute_pathnames(uint32_t WalSegSz, + uint32_t timeline, + uint64_t lsn, + char *dir, + char *walFileName, + char *sqlFileName); + bool stream_write_message(FILE *out, LogicalMessage *msg); bool stream_write_transaction(FILE *out, LogicalTransaction *tx); @@ -724,6 +731,7 @@ bool stream_apply_sql(StreamApplyContext *context, bool stream_apply_init_context(StreamApplyContext *context, DatabaseCatalog *sourceDB, + DatabaseCatalog *replayDB, CDCPaths *paths, ConnStrings *connStrings, char *origin, diff --git a/src/bin/pgcopydb/ld_transform.c b/src/bin/pgcopydb/ld_transform.c index 1dd0dd896..de317cabb 100644 --- a/src/bin/pgcopydb/ld_transform.c +++ b/src/bin/pgcopydb/ld_transform.c @@ -280,24 +280,12 @@ stream_transform_resume(StreamSpecs *specs) return false; } - /* we need timeline and wal_segment_size to compute WAL filenames */ - if (specs->system.timeline == 0) + if (!ld_store_open_replaydb(specs)) { - if (!stream_read_context(specs)) - { - log_error("Failed to read the streaming context information " - "from the source database and internal catalogs, " - "see above for details"); - return false; - } + /* errors have already been logged */ + return false; } - privateContext->WalSegSz = specs->WalSegSz; - privateContext->timeline = specs->system.timeline; - - log_debug("Source database wal_segment_size is %u", specs->WalSegSz); - log_debug("Source database timeline is %d", specs->system.timeline); - char jsonFileName[MAXPGPATH] = { 0 }; char sqlFileName[MAXPGPATH] = { 0 }; @@ -664,23 +652,10 @@ stream_transform_rotate(StreamContext *privateContext) bool stream_transform_worker(StreamSpecs *specs) { - /* - * The timeline and wal segment size are determined when connecting to the - * source database, and stored to local files at that time. When the Stream - * Transform Worker process is created, that information is read from our - * local files and internal catalogs. - */ - if (!stream_read_context(specs)) + /* at startup, open the current replaydb file */ + if (!ld_store_open_replaydb(specs)) { - if (asked_to_stop || asked_to_stop_fast || asked_to_quit) - { - log_debug("Stream Transform Worker startup was interrupted"); - return true; - } - - log_error("Failed to read the streaming context information " - "from the source database and internal catalogs, " - "see above for details"); + /* errors have already been logged */ return false; } diff --git a/src/bin/pgcopydb/pgsql.c b/src/bin/pgcopydb/pgsql.c index 865804e25..72d3a5acb 100644 --- a/src/bin/pgcopydb/pgsql.c +++ b/src/bin/pgcopydb/pgsql.c @@ -3992,13 +3992,6 @@ pgsql_start_replication(LogicalStreamClient *client) return false; } - /* determine remote server's xlog segment size */ - if (!RetrieveWalSegSize(client)) - { - destroyPQExpBuffer(query); - return false; - } - log_sql("%s", query->data); PGresult *res = PQexec(pgsql->connection, query->data); @@ -4045,7 +4038,6 @@ pgsql_stream_logical(LogicalStreamClient *client, LogicalStreamContext *context) context->plugin = client->plugin; context->timeline = client->system.timeline; - context->WalSegSz = client->WalSegSz; context->tracking = &(client->current); client->now = feGetCurrentTimestamp(); @@ -4650,90 +4642,6 @@ prepareToTerminate(LogicalStreamClient *client, bool keepalive, XLogRecPtr lsn) } -/* - * From version 10, explicitly set wal segment size using SHOW wal_segment_size - * since ControlFile is not accessible here. - */ -bool -RetrieveWalSegSize(LogicalStreamClient *client) -{ - PGconn *conn = client->pgsql.connection; - - PGresult *res; - char xlog_unit[3]; - int xlog_val, - multiplier = 1; - - /* check connection existence */ - if (conn == NULL) - { - log_error("BUG: RetrieveWalSegSize called with a NULL client connection"); - return false; - } - - /* for previous versions set the default xlog seg size */ - if (PQserverVersion(conn) < MINIMUM_VERSION_FOR_SHOW_CMD) - { - client->WalSegSz = DEFAULT_XLOG_SEG_SIZE; - return true; - } - - res = PQexec(conn, "SHOW wal_segment_size"); - if (PQresultStatus(res) != PGRES_TUPLES_OK) - { - log_error("could not send replication command \"%s\": %s", - "SHOW wal_segment_size", PQerrorMessage(conn)); - - PQclear(res); - return false; - } - if (PQntuples(res) != 1 || PQnfields(res) < 1) - { - log_error("could not fetch WAL segment size: got %d rows and %d fields, " - "expected %d rows and %d or more fields", - PQntuples(res), PQnfields(res), 1, 1); - - PQclear(res); - return false; - } - - /* fetch xlog value and unit from the result */ - if (sscanf(PQgetvalue(res, 0, 0), "%d%2s", &xlog_val, xlog_unit) != 2) /* IGNORE-BANNED */ - { - log_error("WAL segment size could not be parsed"); - PQclear(res); - return false; - } - - PQclear(res); - - /* set the multiplier based on unit to convert xlog_val to bytes */ - if (strcmp(xlog_unit, "MB") == 0) - { - multiplier = 1024 * 1024; - } - else if (strcmp(xlog_unit, "GB") == 0) - { - multiplier = 1024 * 1024 * 1024; - } - - /* convert and set WalSegSz */ - client->WalSegSz = xlog_val * multiplier; - - if (!IsValidWalSegSize(client->WalSegSz)) - { - log_error("WAL segment size must be a power of two between 1 MB and 1 GB, " - "but the remote server reported a value of %d bytes", - client->WalSegSz); - return false; - } - - log_sql("RetrieveWalSegSize: %d", client->WalSegSz); - - return true; -} - - /* * Get block size from the connected Postgres instance. */ diff --git a/src/bin/pgcopydb/pgsql.h b/src/bin/pgcopydb/pgsql.h index 6e14b9f2a..0ec680c95 100644 --- a/src/bin/pgcopydb/pgsql.h +++ b/src/bin/pgcopydb/pgsql.h @@ -437,7 +437,6 @@ typedef struct LogicalStreamContext XLogRecPtr cur_record_lsn; int timeline; - uint32_t WalSegSz; const char *buffer; /* expose internal buffer */ StreamOutputPlugin plugin; @@ -474,8 +473,6 @@ typedef struct LogicalStreamClient StreamOutputPlugin plugin; KeyVal pluginOptions; - uint32_t WalSegSz; - XLogRecPtr startpos; XLogRecPtr endpos; @@ -525,11 +522,6 @@ bool pgsql_start_replication(LogicalStreamClient *client); bool pgsql_stream_logical(LogicalStreamClient *client, LogicalStreamContext *context); -/* SHOW command for replication connection was introduced in version 10 */ -#define MINIMUM_VERSION_FOR_SHOW_CMD 100000 - -bool RetrieveWalSegSize(LogicalStreamClient *client); - bool pgsql_get_block_size(PGSQL *pgsql, int *blockSize); bool pgsql_replication_origin_oid(PGSQL *pgsql, char *nodeName, uint32_t *oid); diff --git a/src/bin/pgcopydb/schema.h b/src/bin/pgcopydb/schema.h index 9c37e88bd..fde116f1a 100644 --- a/src/bin/pgcopydb/schema.h +++ b/src/bin/pgcopydb/schema.h @@ -324,7 +324,8 @@ typedef enum DATABASE_CATALOG_TYPE_UNKNOWN = 0, DATABASE_CATALOG_TYPE_SOURCE, DATABASE_CATALOG_TYPE_FILTER, - DATABASE_CATALOG_TYPE_TARGET + DATABASE_CATALOG_TYPE_TARGET, + DATABASE_CATALOG_TYPE_REPLAY } DatabaseCatalogType; @@ -395,6 +396,7 @@ typedef struct Catalogs DatabaseCatalog source; DatabaseCatalog filter; DatabaseCatalog target; + DatabaseCatalog replay; } Catalogs; diff --git a/src/bin/pgcopydb/sentinel.c b/src/bin/pgcopydb/sentinel.c index 51323bf10..6dd92be39 100644 --- a/src/bin/pgcopydb/sentinel.c +++ b/src/bin/pgcopydb/sentinel.c @@ -32,8 +32,9 @@ sentinel_setup(DatabaseCatalog *catalog, uint64_t startpos, uint64_t endpos) char *sql = "insert or replace into sentinel(" - " id, startpos, endpos, apply, write_lsn, flush_lsn, replay_lsn) " - "values($1, $2, $3, $4, '0/0', '0/0', '0/0')"; + " id, startpos, endpos, apply, " + " write_lsn, transform_lsn, flush_lsn, replay_lsn) " + "values($1, $2, $3, $4, '0/0', '0/0', '0/0', '0/0')"; if (!semaphore_lock(&(catalog->sema))) { @@ -345,6 +346,70 @@ sentinel_update_write_flush_lsn(DatabaseCatalog *catalog, } +/* + * sentinel_update_transform_lsn updates our pgcopydb sentinel table end pos. + */ +bool +sentinel_update_transform_lsn(DatabaseCatalog *catalog, uint64_t transform_lsn) +{ + sqlite3 *db = catalog->db; + + if (db == NULL) + { + log_error("BUG: sentinel_update_transform_lsn: db is NULL"); + return false; + } + + char *sql = "update sentinel set transform_lsn = $1 where id = 1"; + + if (!semaphore_lock(&(catalog->sema))) + { + /* errors have already been logged */ + return false; + } + + SQLiteQuery query = { .errorOnZeroRows = true }; + + if (!catalog_sql_prepare(db, sql, &query)) + { + /* errors have already been logged */ + (void) semaphore_unlock(&(catalog->sema)); + return false; + } + + char transformLSN[PG_LSN_MAXLENGTH] = { 0 }; + + sformat(transformLSN, sizeof(transformLSN), "%X/%X", + LSN_FORMAT_ARGS(transform_lsn)); + + /* bind our parameters now */ + BindParam params[] = { + { BIND_PARAMETER_TYPE_TEXT, "transform_lsn", 0, (char *) transformLSN } + }; + + int count = sizeof(params) / sizeof(params[0]); + + if (!catalog_sql_bind(&query, params, count)) + { + /* errors have already been logged */ + (void) semaphore_unlock(&(catalog->sema)); + return false; + } + + /* now execute the query, which does not return any row */ + if (!catalog_sql_execute_once(&query)) + { + /* errors have already been logged */ + (void) semaphore_unlock(&(catalog->sema)); + return false; + } + + (void) semaphore_unlock(&(catalog->sema)); + + return true; +} + + /* * sentinel_update_replay_lsn updates our pgcopydb sentinel table end pos. */ @@ -422,7 +487,8 @@ sentinel_get(DatabaseCatalog *catalog, CopyDBSentinel *sentinel) } char *sql = - "select startpos, endpos, apply, write_lsn, flush_lsn, replay_lsn " + "select startpos, endpos, apply, " + " write_lsn, transform_lsn, flush_lsn, replay_lsn " " from sentinel " " where id = 1"; @@ -508,9 +574,9 @@ sentinel_fetch(SQLiteQuery *query) { char *lsn = (char *) sqlite3_column_text(query->ppStmt, 4); - if (!parseLSN(lsn, &(sentinel->flush_lsn))) + if (!parseLSN(lsn, &(sentinel->transform_lsn))) { - log_error("Failed to parse sentinel flush_lsn LSN \"%s\"", lsn); + log_error("Failed to parse sentinel transform_lsn LSN \"%s\"", lsn); return false; } } @@ -519,6 +585,17 @@ sentinel_fetch(SQLiteQuery *query) { char *lsn = (char *) sqlite3_column_text(query->ppStmt, 5); + if (!parseLSN(lsn, &(sentinel->flush_lsn))) + { + log_error("Failed to parse sentinel flush_lsn LSN \"%s\"", lsn); + return false; + } + } + + if (sqlite3_column_type(query->ppStmt, 6) != SQLITE_NULL) + { + char *lsn = (char *) sqlite3_column_text(query->ppStmt, 6); + if (!parseLSN(lsn, &(sentinel->replay_lsn))) { log_error("Failed to parse sentinel replay_lsn LSN \"%s\"", lsn); @@ -560,6 +637,34 @@ sentinel_sync_recv(DatabaseCatalog *catalog, } +/* + * sentinel_sync_transform updates the current sentinel values for write_lsn and + * flush_lsn, and fetches the current value for replay_lsn, endpos, and apply. + */ +bool +sentinel_sync_transform(DatabaseCatalog *catalog, + uint64_t transform_lsn, + CopyDBSentinel *sentinel) +{ + if (!sentinel_update_transform_lsn(catalog, transform_lsn)) + { + /* errors have already been logged */ + return false; + } + + if (!sentinel_get(catalog, sentinel)) + { + /* errors have already been logged */ + return false; + } + + log_debug("sentinel_sync_transform: transform_lsn %X/%X", + LSN_FORMAT_ARGS(sentinel->transform_lsn)); + + return true; +} + + /* * sentinel_sync_apply updates the current sentinel values for replay_lsn, and * fetches the current value for endpos and apply. diff --git a/src/bin/pgcopydb/snapshot.c b/src/bin/pgcopydb/snapshot.c index c5d329536..314bd54cb 100644 --- a/src/bin/pgcopydb/snapshot.c +++ b/src/bin/pgcopydb/snapshot.c @@ -7,6 +7,7 @@ #include #include "copydb.h" +#include "ld_stream.h" #include "log.h" /* @@ -410,6 +411,7 @@ copydb_create_logical_replication_slot(CopyDataSpec *copySpecs, sourceSnapshot->kind = SNAPSHOT_KIND_LOGICAL; + StreamSpecs specs = { 0 }; LogicalStreamClient *stream = &(sourceSnapshot->stream); if (!pgsql_init_stream(stream, @@ -423,6 +425,14 @@ copydb_create_logical_replication_slot(CopyDataSpec *copySpecs, return false; } + /* fetch the source timeline */ + if (!pgsql_identify_system(&(stream->pgsql), &(stream->system))) + { + /* errors have already been logged */ + return false; + } + + /* now create the replication slot, exporting the snapshot */ if (!pgsql_create_logical_replication_slot(stream, slot)) { log_error("Failed to create a logical replication slot " @@ -456,6 +466,18 @@ copydb_create_logical_replication_slot(CopyDataSpec *copySpecs, return false; } + /* initialize catalog timeline history and create a replayDB SQLite file */ + specs.paths = copySpecs->cfPaths.cdc; + specs.sourceDB = &(copySpecs->catalogs.source); + specs.replayDB = &(copySpecs->catalogs.replay); + specs.private.startpos = slot->lsn; + + if (!stream_init_timeline(&specs, stream)) + { + /* errors have already been logged */ + return false; + } + return true; } From 199ebff6a607ba9f501296d2d8d01c5c88b7e7a5 Mon Sep 17 00:00:00 2001 From: Dimitri Fontaine Date: Thu, 12 Sep 2024 18:25:18 +0200 Subject: [PATCH 4/8] Update the transform code to use the SQLite replayDB. --- src/bin/pgcopydb/catalog.c | 4 +- src/bin/pgcopydb/copydb.h | 3 +- src/bin/pgcopydb/follow.c | 11 +- src/bin/pgcopydb/ld_apply.c | 1 + src/bin/pgcopydb/ld_store.c | 603 ++++++++++++++++++++- src/bin/pgcopydb/ld_store.h | 97 ++++ src/bin/pgcopydb/ld_stream.c | 45 +- src/bin/pgcopydb/ld_stream.h | 43 +- src/bin/pgcopydb/ld_transform.c | 846 ++++++++++++++---------------- src/bin/pgcopydb/pgsql.c | 10 + src/bin/pgcopydb/pgsql.h | 1 + src/bin/pgcopydb/pgsql_timeline.c | 9 +- src/bin/pgcopydb/snapshot.c | 12 +- tests/cdc-low-level/copydb.sh | 16 +- 14 files changed, 1144 insertions(+), 557 deletions(-) create mode 100644 src/bin/pgcopydb/ld_store.h diff --git a/src/bin/pgcopydb/catalog.c b/src/bin/pgcopydb/catalog.c index b1be98e3b..685786e3c 100644 --- a/src/bin/pgcopydb/catalog.c +++ b/src/bin/pgcopydb/catalog.c @@ -8100,7 +8100,9 @@ catalog_sql_bind(SQLiteQuery *query, BindParam *params, int count) { if (!catalog_bind_parameters(query->db, query->ppStmt, params, count)) { - /* errors have already been logged */ + log_error("[SQLite] Failed to bind parameters in query: %s", + query->sql); + (void) sqlite3_clear_bindings(query->ppStmt); (void) sqlite3_finalize(query->ppStmt); return false; diff --git a/src/bin/pgcopydb/copydb.h b/src/bin/pgcopydb/copydb.h index b14e7afbd..42bbc9dbd 100644 --- a/src/bin/pgcopydb/copydb.h +++ b/src/bin/pgcopydb/copydb.h @@ -332,7 +332,8 @@ bool copydb_close_snapshot(CopyDataSpec *copySpecs); bool copydb_create_logical_replication_slot(CopyDataSpec *copySpecs, const char *logrep_pguri, - ReplicationSlot *slot); + ReplicationSlot *slot, + char *cdcPathDir); bool snapshot_write_slot(const char *filename, ReplicationSlot *slot); bool snapshot_read_slot(const char *filename, ReplicationSlot *slot); diff --git a/src/bin/pgcopydb/follow.c b/src/bin/pgcopydb/follow.c index 4a8f2a801..001555055 100644 --- a/src/bin/pgcopydb/follow.c +++ b/src/bin/pgcopydb/follow.c @@ -34,7 +34,8 @@ follow_export_snapshot(CopyDataSpec *copySpecs, StreamSpecs *streamSpecs) if (!copydb_create_logical_replication_slot(copySpecs, logrep_pguri, - &(streamSpecs->slot))) + &(streamSpecs->slot), + streamSpecs->paths.dir)) { /* errors have already been logged */ return false; @@ -681,6 +682,14 @@ follow_start_prefetch(StreamSpecs *specs) bool follow_start_transform(StreamSpecs *specs) { + if (!stream_transform_messages(specs)) + { + log_error("Transform process failed, see above for details"); + return false; + } + + return true; + /* * In replay mode, the JSON messages are read from stdin, which we * now setup to be a pipe between prefetch and transform processes; diff --git a/src/bin/pgcopydb/ld_apply.c b/src/bin/pgcopydb/ld_apply.c index 88ff4f4c1..ef63c7e12 100644 --- a/src/bin/pgcopydb/ld_apply.c +++ b/src/bin/pgcopydb/ld_apply.c @@ -20,6 +20,7 @@ #include "cli_root.h" #include "copydb.h" #include "env_utils.h" +#include "ld_store.h" #include "ld_stream.h" #include "lock_utils.h" #include "log.h" diff --git a/src/bin/pgcopydb/ld_store.c b/src/bin/pgcopydb/ld_store.c index 5621f53de..e48b31e86 100644 --- a/src/bin/pgcopydb/ld_store.c +++ b/src/bin/pgcopydb/ld_store.c @@ -12,6 +12,7 @@ #include "parson.h" #include "copydb.h" +#include "ld_store.h" #include "ld_stream.h" #include "lock_utils.h" #include "log.h" @@ -32,7 +33,7 @@ ld_store_open_replaydb(StreamSpecs *specs) StreamContext *privateContext = &(specs->private); DatabaseCatalog *replayDB = specs->replayDB; - if (!ld_store_current_filename(specs)) + if (!ld_store_set_current_cdc_filename(specs)) { /* errors have already been logged */ return false; @@ -84,11 +85,11 @@ ld_store_open_replaydb(StreamSpecs *specs) /* - * ld_store_current_filename queries the sourceDB SQLite catalog for an open - * file for the current timeline. + * ld_store_set_current_filename queries the sourceDB SQLite catalog for an + * open file for the current timeline. */ bool -ld_store_current_filename(StreamSpecs *specs) +ld_store_set_current_cdc_filename(StreamSpecs *specs) { StreamContext *privateContext = &(specs->private); @@ -96,7 +97,7 @@ ld_store_current_filename(StreamSpecs *specs) if (db == NULL) { - log_error("BUG: ld_store_current_filename: db is NULL"); + log_error("BUG: ld_store_set_current_cdc_filename: db is NULL"); return false; } @@ -112,7 +113,7 @@ ld_store_current_filename(StreamSpecs *specs) SQLiteQuery query = { .context = specs->replayDB->dbfile, - .fetchFunction = &ld_store_current_filename_fetch + .fetchFunction = &ld_store_cdc_filename_fetch }; if (!catalog_sql_prepare(db, sql, &query)) @@ -158,10 +159,168 @@ ld_store_current_filename(StreamSpecs *specs) /* - * ld_store_current_filename_fetch is a SQLiteQuery callback. + * ld_store_set_cdc_filename_at_lsn finds the CDC file that contains the given + * LSN. + * + * Because of the Postgres Logical Decoding default transaction ordering, where + * a transaction is streamed after COMMIT, we could have interleaving + * transactions, meaning several file candidates for the same LSN. + * + * In that case open the files until we find the one that actually contains the + * given LSN. + * + * When the given LSN is "0/0", which is InvalidXLogRecPtr, then open the first + * file that we have. */ bool -ld_store_current_filename_fetch(SQLiteQuery *query) +ld_store_set_cdc_filename_at_lsn(StreamSpecs *specs, uint64_t lsn) +{ + if (lsn == InvalidXLogRecPtr) + { + return ld_store_set_first_cdc_filename(specs); + } + + sqlite3 *db = specs->sourceDB->db; + + if (db == NULL) + { + log_error("BUG: ld_store_set_cdc_filename_at_lsn: db is NULL"); + return false; + } + + char *sql = + " select filename " + " from cdc_files " + " where startpos >= $1 and endpos <= $2 " + "order by id, filename "; + + char candidate[MAXPGPATH] = { 0 }; + + SQLiteQuery query = { + .context = candidate, + .fetchFunction = &ld_store_cdc_filename_fetch + }; + + if (!catalog_sql_prepare(db, sql, &query)) + { + /* errors have already been logged */ + return false; + } + + char pg_lsn[PG_LSN_MAXLENGTH] = { 0 }; + sformat(pg_lsn, sizeof(pg_lsn), "%X/%X", LSN_FORMAT_ARGS(lsn)); + + /* bind our parameters now */ + BindParam params[] = { + { BIND_PARAMETER_TYPE_TEXT, "lsn", 0, (char *) pg_lsn }, + { BIND_PARAMETER_TYPE_TEXT, "lsn", 0, (char *) pg_lsn } + }; + + int count = sizeof(params) / sizeof(params[0]); + + if (!catalog_sql_bind(&query, params, count)) + { + /* errors have already been logged */ + return false; + } + + int rc; + + while ((rc = catalog_sql_step(&query)) != SQLITE_DONE) + { + if (rc != SQLITE_ROW) + { + log_error("Failed to fetch cdc_file for LSN %X/%X, " + "see above for details", + LSN_FORMAT_ARGS(lsn)); + return false; + } + + if (!ld_store_cdc_filename_fetch(&query)) + { + /* errors have already been logged */ + return false; + } + + DatabaseCatalog *candidateDB = + (DatabaseCatalog *) calloc(1, sizeof(DatabaseCatalog)); + + strlcpy(candidateDB->dbfile, candidate, sizeof(candidateDB->dbfile)); + + if (!catalog_open(candidateDB)) + { + /* errors have already been logged */ + return false; + } + + /* now check if the candidateDB contains the given LSN */ + if (ld_store_lookup_lsn(candidateDB, lsn)) + { + specs->replayDB = candidateDB; + break; + } + + if (!catalog_close(candidateDB)) + { + /* errors have already been logged */ + return false; + } + } + + if (!catalog_sql_finalize(&query)) + { + /* errors have already been logged */ + return false; + } + + return true; +} + + +/* + * ld_store_set_cdc_filename_at_lsn sets our replayDB dbfile value to the first + * CDC file that we have in the catalogs. + */ +bool +ld_store_set_first_cdc_filename(StreamSpecs *specs) +{ + sqlite3 *db = specs->sourceDB->db; + + if (db == NULL) + { + log_error("BUG: ld_store_first_cdc_filename: db is NULL"); + return false; + } + + char *sql = "select filename from cdc_files order by id limit 1"; + + SQLiteQuery query = { + .context = specs->replayDB->dbfile, + .fetchFunction = &ld_store_cdc_filename_fetch + }; + + if (!catalog_sql_prepare(db, sql, &query)) + { + /* errors have already been logged */ + return false; + } + + /* now execute the query, which does not return any row */ + if (!catalog_sql_execute_once(&query)) + { + /* errors have already been logged */ + return false; + } + + return true; +} + + +/* + * ld_store_cdc_filename_fetch is a SQLiteQuery callback. + */ +bool +ld_store_cdc_filename_fetch(SQLiteQuery *query) { char *filename = (char *) query->context; @@ -174,8 +333,57 @@ ld_store_current_filename_fetch(SQLiteQuery *query) strlcpy(filename, (char *) sqlite3_column_text(query->ppStmt, 0), MAXPGPATH); + } + + return true; +} + - log_info("ld_store_current_filename_fetch: \"%s\"", filename); +/* + * ld_store_lookup_lsn searches the given LSN in the given replayDB database. + */ +bool +ld_store_lookup_lsn(DatabaseCatalog *catalog, uint64_t lsn) +{ + sqlite3 *db = catalog->db; + + if (db == NULL) + { + log_error("BUG: ld_store_lookup_lsn: db is NULL"); + return false; + } + + char *sql = "select 1 from output where lsn = $1 limit 1"; + + SQLiteQuery query = { + .errorOnZeroRows = true + }; + + char pg_lsn[PG_LSN_MAXLENGTH] = { 0 }; + sformat(pg_lsn, sizeof(pg_lsn), "%X/%X", LSN_FORMAT_ARGS(lsn)); + + if (!catalog_sql_prepare(db, sql, &query)) + { + /* errors have already been logged */ + return false; + } + + /* bind our parameters now */ + BindParam params[1] = { + { BIND_PARAMETER_TYPE_TEXT, "lsn", 0, (char *) pg_lsn }, + }; + + if (!catalog_sql_bind(&query, params, 1)) + { + /* errors have already been logged */ + return false; + } + + /* now execute the query, which return exactly one row */ + if (!catalog_sql_execute_once(&query)) + { + /* errors have already been logged */ + return false; } return true; @@ -444,3 +652,380 @@ ld_store_insert_internal_message(DatabaseCatalog *catalog, return true; } + + +/* + * ld_store_insert_replay_stmt inserts a replay statement in the stmt and + * replay tables of the replayDB. + */ +bool +ld_store_insert_replay_stmt(DatabaseCatalog *catalog, + ReplayDBStmt *replayStmt) +{ + sqlite3 *db = catalog->db; + + if (db == NULL) + { + log_error("BUG: ld_store_insert_internal_message: db is NULL"); + return false; + } + + /* compute the hash as a string, needed in both stmt and replay tables */ + char hash[9] = { 0 }; + sformat(hash, sizeof(hash), "%x", replayStmt->hash); + + if (replayStmt->stmt != NULL) + { + char *sql = "insert into stmt (hash, sql) values($1, $2)"; + + SQLiteQuery query = { 0 }; + + if (!catalog_sql_prepare(db, sql, &query)) + { + /* errors have already been logged */ + return false; + } + + /* bind our parameters now */ + BindParam params[] = { + { BIND_PARAMETER_TYPE_TEXT, "hash", 0, hash }, + { BIND_PARAMETER_TYPE_TEXT, "sql", 0, replayStmt->stmt } + }; + + int count = sizeof(params) / sizeof(params[0]); + + if (!catalog_sql_bind(&query, params, count)) + { + /* errors have already been logged */ + return false; + } + + /* now execute the query, which does not return any row */ + if (!catalog_sql_execute_once(&query)) + { + /* errors have already been logged */ + return false; + } + } + + char *sql = + "insert into replay(action, xid, lsn, timestamp, stmt_hash, stmt_args)" + "values($1, $2, $3, $4, $5, $6)"; + + SQLiteQuery query = { 0 }; + + if (!catalog_sql_prepare(db, sql, &query)) + { + /* errors have already been logged */ + return false; + } + + char action[2] = { replayStmt->action, '\0' }; + + /* not all messages have an xid */ + BindParameterType xidParamType = replayStmt->xid > 0 ? + BIND_PARAMETER_TYPE_INT64 : BIND_PARAMETER_TYPE_NULL; + + /* not all messages have an lsn */ + char lsn[PG_LSN_MAXLENGTH] = { 0 }; + BindParameterType lsnParamType = BIND_PARAMETER_TYPE_NULL; + + if (replayStmt->lsn != InvalidXLogRecPtr) + { + lsnParamType = BIND_PARAMETER_TYPE_TEXT; + sformat(lsn, sizeof(lsn), "%X/%X", LSN_FORMAT_ARGS(replayStmt->lsn)); + } + + /* not all messages have a time entry */ + char *timestr = NULL; + BindParameterType timeParamType = BIND_PARAMETER_TYPE_NULL; + + if (!IS_EMPTY_STRING_BUFFER(replayStmt->timestamp)) + { + timeParamType = BIND_PARAMETER_TYPE_TEXT; + timestr = replayStmt->timestamp; + } + + /* not all messages have a statement (hash, data) */ + BindParameterType hashParamType = replayStmt->hash > 0 ? + BIND_PARAMETER_TYPE_TEXT : BIND_PARAMETER_TYPE_NULL; + + BindParameterType dataParamType = replayStmt->data != NULL ? + BIND_PARAMETER_TYPE_TEXT : BIND_PARAMETER_TYPE_NULL; + + /* bind our parameters now */ + BindParam params[] = { + { BIND_PARAMETER_TYPE_TEXT, "action", 0, action }, + { xidParamType, "xid", replayStmt->xid, NULL }, + { lsnParamType, "lsn", 0, lsn }, + { timeParamType, "timestamp", 0, timestr }, + { hashParamType, "stmt_hash", 0, hash }, + { dataParamType, "stmt_args", 0, replayStmt->data }, + }; + + int count = sizeof(params) / sizeof(params[0]); + + if (!catalog_sql_bind(&query, params, count)) + { + /* errors have already been logged */ + return false; + } + + /* now execute the query, which does not return any row */ + if (!catalog_sql_execute_once(&query)) + { + /* errors have already been logged */ + return false; + } + + return true; +} + + +/* + * ld_store_iter_output iterates over the output table of the replayDB. + */ +bool +ld_store_iter_output(StreamSpecs *specs, ReplayDBOutputIterFun *callback) +{ + ReplayDBOutputIterator *iter = + (ReplayDBOutputIterator *) calloc(1, sizeof(ReplayDBOutputIterator)); + + iter->catalog = specs->replayDB; + iter->transform_lsn = specs->sentinel.transform_lsn; + iter->endpos = specs->endpos; + + if (!ld_store_iter_output_init(iter)) + { + /* errors have already been logged */ + return false; + } + + for (;;) + { + if (!ld_store_iter_output_next(iter)) + { + /* errors have already been logged */ + return false; + } + + ReplayDBOutputMessage *output = iter->output; + + if (output == NULL) + { + if (!ld_store_iter_output_finish(iter)) + { + /* errors have already been logged */ + return false; + } + + break; + } + + bool stop = false; + + /* now call the provided callback */ + if (!(*callback)(specs, output, &stop)) + { + log_error("Failed to iterate over CDC output messages, " + "see above for details"); + return false; + } + + if (stop) + { + if (!ld_store_iter_output_finish(iter)) + { + /* errors have already been logged */ + return false; + } + return true; + } + } + + return true; +} + + +/* + * ld_store_iter_output_init initializes an Iterator over our SQLite replayDB + * output messages. + */ +bool +ld_store_iter_output_init(ReplayDBOutputIterator *iter) +{ + sqlite3 *db = iter->catalog->db; + + if (db == NULL) + { + log_error("BUG: Failed to initialize output iterator: db is NULL"); + return false; + } + + iter->output = + (ReplayDBOutputMessage *) calloc(1, sizeof(ReplayDBOutputMessage)); + + if (iter->output == NULL) + { + log_error(ALLOCATION_FAILED_ERROR); + return false; + } + + char *sql = + " select id, action, xid, lsn, timestamp, message " + " from output " + " where lsn > $1 and lsn <= $2 " + " order by id"; + + SQLiteQuery *query = &(iter->query); + + query->context = iter->output; + query->fetchFunction = &ld_store_output_fetch; + + if (!catalog_sql_prepare(db, sql, query)) + { + /* errors have already been logged */ + return false; + } + + char tlsn[PG_LSN_MAXLENGTH] = { 0 }; + char elsn[PG_LSN_MAXLENGTH] = { 0 }; + + sformat(tlsn, sizeof(tlsn), "%X/%X", LSN_FORMAT_ARGS(iter->transform_lsn)); + sformat(elsn, sizeof(elsn), "%X/%X", LSN_FORMAT_ARGS(iter->endpos)); + + /* bind our parameters now */ + BindParam params[] = { + { BIND_PARAMETER_TYPE_TEXT, "transform_lsn", 0, tlsn }, + { BIND_PARAMETER_TYPE_TEXT, "endpos", 0, elsn } + }; + + int count = sizeof(params) / sizeof(params[0]); + + if (!catalog_sql_bind(query, params, count)) + { + /* errors have already been logged */ + return false; + } + + return true; +} + + +/* + * ld_store_iter_output_next fetches the next ReplayDBOutputMessage + */ +bool +ld_store_iter_output_next(ReplayDBOutputIterator *iter) +{ + SQLiteQuery *query = &(iter->query); + + int rc = catalog_sql_step(query); + + if (rc == SQLITE_DONE) + { + iter->output = NULL; + + return true; + } + + if (rc != SQLITE_ROW) + { + log_error("Failed to step through statement: %s", query->sql); + log_error("[SQLite] %s", sqlite3_errmsg(query->db)); + return false; + } + + return ld_store_output_fetch(query); +} + + +/* + * ld_store_output_fetch fetches a ReplayDBOutputMessage entry from a SQLite + * ppStmt result set. + */ +bool +ld_store_output_fetch(SQLiteQuery *query) +{ + ReplayDBOutputMessage *output = (ReplayDBOutputMessage *) query->context; + + /* cleanup the memory area before re-use */ + bzero(output, sizeof(ReplayDBOutputMessage)); + + output->id = sqlite3_column_int64(query->ppStmt, 0); + + if (sqlite3_column_type(query->ppStmt, 1) != SQLITE_NULL) + { + char *action = (char *) sqlite3_column_text(query->ppStmt, 1); + output->action = action[0]; + } + else + { + log_error("Failed to fetch action for output id %lld", + (long long) output->id); + return false; + } + + output->xid = sqlite3_column_int64(query->ppStmt, 2); + + /* lsn could be NULL */ + output->lsn = InvalidXLogRecPtr; + + if (sqlite3_column_type(query->ppStmt, 3) != SQLITE_NULL) + { + char *lsn = (char *) sqlite3_column_text(query->ppStmt, 3); + + if (!parseLSN(lsn, &(output->lsn))) + { + log_error("Failed to parse LSN \"%s\"", lsn); + return false; + } + } + + /* timestamp */ + if (sqlite3_column_type(query->ppStmt, 4) != SQLITE_NULL) + { + strlcpy(output->timestamp, + (char *) sqlite3_column_text(query->ppStmt, 4), + sizeof(output->timestamp)); + } + + /* message */ + if (sqlite3_column_type(query->ppStmt, 5) != SQLITE_NULL) + { + int len = sqlite3_column_bytes(query->ppStmt, 5); + int bytes = len + 1; + + output->jsonBuffer = (char *) calloc(bytes, sizeof(char)); + + if (output->jsonBuffer == NULL) + { + log_fatal(ALLOCATION_FAILED_ERROR); + return false; + } + + strlcpy(output->jsonBuffer, + (char *) sqlite3_column_text(query->ppStmt, 5), + bytes); + } + + return true; +} + + +/* + * ld_store_iter_output_finish cleans-up the SQLite query used in the iterator. + */ +bool +ld_store_iter_output_finish(ReplayDBOutputIterator *iter) +{ + SQLiteQuery *query = &(iter->query); + + if (!catalog_sql_finalize(query)) + { + /* errors have already been logged */ + return false; + } + + return true; +} diff --git a/src/bin/pgcopydb/ld_store.h b/src/bin/pgcopydb/ld_store.h new file mode 100644 index 000000000..9490b0976 --- /dev/null +++ b/src/bin/pgcopydb/ld_store.h @@ -0,0 +1,97 @@ +/* + * src/bin/pgcopydb/ld_store.h + * CDC implementation for Postgres Logical Decoding in pgcopydb. + */ + +#ifndef LD_STORE_H +#define LD_STORE_H + +#include + +#include "ld_stream.h" + + +typedef struct ReplayDBOutputMessage +{ + uint64_t id; + StreamAction action; + uint32_t xid; + uint64_t lsn; + + char timestamp[PG_MAX_TIMESTAMP]; + + char *jsonBuffer; /* malloc'ed area */ + + PQExpBuffer stmt; + PQExpBuffer data; +} ReplayDBOutputMessage; + + +/* + * This structure reprensents both the stmt and replay tables of the replayDB + * schema, as these are meant to be used together, via a JOIN on the statement + * hash. + */ +typedef struct ReplayDBStmt +{ + uint64_t id; + StreamAction action; + uint32_t xid; + uint64_t lsn; + + char timestamp[PG_MAX_TIMESTAMP]; + + uint32_t hash; + char *stmt; /* malloc'ed area */ + char *data; /* malloc'ed area */ +} ReplayDBStmt; + + +bool ld_store_open_replaydb(StreamSpecs *specs); + +bool ld_store_set_current_cdc_filename(StreamSpecs *specs); +bool ld_store_set_first_cdc_filename(StreamSpecs *specs); +bool ld_store_set_cdc_filename_at_lsn(StreamSpecs *specs, uint64_t lsn); + +bool ld_store_cdc_filename_fetch(SQLiteQuery *query); + +bool ld_store_lookup_lsn(DatabaseCatalog *catalog, uint64_t lsn); + +bool ld_store_insert_cdc_filename(StreamSpecs *specs); + +bool ld_store_insert_timeline_history(DatabaseCatalog *catalog, + uint32_t tli, + uint64_t startpos, + uint64_t endpos); + +bool ld_store_insert_message(DatabaseCatalog *catalog, + LogicalMessageMetadata *metadata); + +bool ld_store_insert_internal_message(DatabaseCatalog *catalog, + InternalMessage *message); + +bool ld_store_insert_replay_stmt(DatabaseCatalog *catalog, + ReplayDBStmt *replayStmt); + +typedef bool (ReplayDBOutputIterFun)(StreamSpecs *specs, + ReplayDBOutputMessage *output, + bool *stop); + +bool ld_store_iter_output(StreamSpecs *specs, ReplayDBOutputIterFun *callback); + +typedef struct ReplayDBOutputIterator +{ + DatabaseCatalog *catalog; + ReplayDBOutputMessage *output; + SQLiteQuery query; + + uint64_t transform_lsn; + uint64_t endpos; +} ReplayDBOutputIterator; + +bool ld_store_iter_output_init(ReplayDBOutputIterator *iter); +bool ld_store_iter_output_next(ReplayDBOutputIterator *iter); +bool ld_store_output_fetch(SQLiteQuery *query); +bool ld_store_iter_output_finish(ReplayDBOutputIterator *iter); + +#endif /* LD_STORE_H */ diff --git a/src/bin/pgcopydb/ld_stream.c b/src/bin/pgcopydb/ld_stream.c index 6fc28b58f..069de160f 100644 --- a/src/bin/pgcopydb/ld_stream.c +++ b/src/bin/pgcopydb/ld_stream.c @@ -20,6 +20,7 @@ #include "cli_root.h" #include "copydb.h" #include "env_utils.h" +#include "ld_store.h" #include "ld_stream.h" #include "lock_utils.h" #include "log.h" @@ -431,42 +432,20 @@ bool stream_init_timeline(StreamSpecs *specs, LogicalStreamClient *stream) { DatabaseCatalog *sourceDB = specs->sourceDB; - StreamContext *privateContext = &(specs->private); - - privateContext->timeline = stream->system.timeline; - /* insert the timeline history into our catalogs */ - if (stream->system.timelines.count == 0) + if (!parse_timeline_history_file(stream->system.timelineHistoryFilename, + sourceDB, + stream->system.timeline)) { - if (!ld_store_insert_timeline_history(sourceDB, - 1, - InvalidXLogRecPtr, - InvalidXLogRecPtr)) - { - log_error("Failed to initialize source database timeline history, " - "see above for details"); - return false; - } + log_error("Failed to parse timeline history file \"%s\": " + "see above for details", + specs->system.timelineHistoryFilename); + return false; } - else - { - for (int i = 0; i < stream->system.timelines.count; i++) - { - uint32_t tli = stream->system.timelines.history[i].tli; - uint64_t startpos = stream->system.timelines.history[i].begin; - uint64_t endpos = stream->system.timelines.history[i].end; - if (!ld_store_insert_timeline_history(sourceDB, - tli, - startpos, - endpos)) - { - log_error("Failed to initialize source database timeline history, " - "see above for details"); - return false; - } - } - } + /* publish the stream client Identify System information in the specs */ + specs->system = stream->system; + specs->private.timeline = stream->system.timeline; /* now that we have the current timeline and startpos lsn */ if (!ld_store_open_replaydb(specs)) @@ -495,7 +474,6 @@ startLogicalStreaming(StreamSpecs *specs) stream.closeFunction = &streamClose; stream.feedbackFunction = &streamFeedback; stream.keepaliveFunction = &streamKeepalive; - strlcpy(stream.cdcPathDir, specs->paths.dir, MAXPGPATH); /* * Read possibly already existing file to initialize the start LSN from a @@ -544,6 +522,7 @@ startLogicalStreaming(StreamSpecs *specs) if (!pgsql_init_stream(&stream, specs->connStrings->logrep_pguri, + specs->paths.dir, specs->slot.plugin, specs->slot.slotName, specs->startpos, diff --git a/src/bin/pgcopydb/ld_stream.h b/src/bin/pgcopydb/ld_stream.h index a5a3a9266..4e6584d9e 100644 --- a/src/bin/pgcopydb/ld_stream.h +++ b/src/bin/pgcopydb/ld_stream.h @@ -603,26 +603,18 @@ bool stream_fetch_current_lsn(uint64_t *lsn, StreamAction StreamActionFromChar(char action); char * StreamActionToString(StreamAction action); -/* ld_store.c */ -bool ld_store_open_replaydb(StreamSpecs *specs); -bool ld_store_current_filename(StreamSpecs *specs); -bool ld_store_current_filename_fetch(SQLiteQuery *query); -bool ld_store_insert_cdc_filename(StreamSpecs *specs); - -bool ld_store_insert_timeline_history(DatabaseCatalog *catalog, - uint32_t tli, - uint64_t startpos, - uint64_t endpos); - -bool ld_store_insert_message(DatabaseCatalog *catalog, - LogicalMessageMetadata *metadata); +/* ld_transform.c */ +bool stream_transform_messages(StreamSpecs *specs); +bool stream_transform_cdc_file(StreamSpecs *specs); -bool ld_store_insert_internal_message(DatabaseCatalog *catalog, - InternalMessage *message); +bool stream_transform_write_transaction(StreamSpecs *specs); +bool stream_transform_write_replay_stmt(StreamSpecs *specs); +bool stream_transform_write_replay_txn(StreamSpecs *specs); +bool stream_transform_write_message(StreamContext *privateContext, + uint64_t *currentMsgIndex); -/* ld_transform.c */ bool stream_transform_worker(StreamSpecs *specs); bool stream_transform_from_queue(StreamSpecs *specs); @@ -643,8 +635,6 @@ bool stream_transform_stream(StreamSpecs *specs); bool stream_transform_resume(StreamSpecs *specs); bool stream_transform_line(void *ctx, const char *line, bool *stop); -bool stream_transform_write_message(StreamContext *privateContext, - uint64_t *currentMsgIndex); bool stream_transform_message(StreamContext *privateContext, char *message); @@ -664,23 +654,6 @@ bool stream_compute_pathnames(uint32_t WalSegSz, char *walFileName, char *sqlFileName); -bool stream_write_message(FILE *out, LogicalMessage *msg); -bool stream_write_transaction(FILE *out, LogicalTransaction *tx); - -bool stream_write_switchwal(FILE *out, LogicalMessageSwitchWAL *switchwal); -bool stream_write_keepalive(FILE *out, LogicalMessageKeepalive *keepalive); -bool stream_write_endpos(FILE *out, LogicalMessageEndpos *endpos); - -bool stream_write_begin(FILE *out, LogicalTransaction *tx); -bool stream_write_commit(FILE *out, LogicalTransaction *tx); -bool stream_write_rollback(FILE *out, LogicalTransaction *tx); - -bool stream_write_insert(FILE *out, LogicalMessageInsert *insert); -bool stream_write_truncate(FILE *out, LogicalMessageTruncate *truncate); -bool stream_write_update(FILE *out, LogicalMessageUpdate *update); -bool stream_write_delete(FILE * out, LogicalMessageDelete *delete); -bool stream_write_sql_escape_string_constant(FILE *out, const char *str); - bool stream_add_value_in_json_array(LogicalMessageValue *value, JSON_Array *jsArray); diff --git a/src/bin/pgcopydb/ld_transform.c b/src/bin/pgcopydb/ld_transform.c index de317cabb..99f0dbc51 100644 --- a/src/bin/pgcopydb/ld_transform.c +++ b/src/bin/pgcopydb/ld_transform.c @@ -26,6 +26,7 @@ #include "cli_root.h" #include "copydb.h" #include "env_utils.h" +#include "ld_store.h" #include "ld_stream.h" #include "lock_utils.h" #include "log.h" @@ -69,6 +70,84 @@ static GeneratedColumnSet * lookupGeneratedColumnsForTable(GeneratedColumnsCache const char *nspname, const char *relname); +static bool stream_transform_cdc_file_hook(StreamSpecs *specs, + ReplayDBOutputMessage *output, + bool *stop); + + +static bool stream_write_insert(ReplayDBStmt *replayStmt, + LogicalMessageInsert *insert); + +static bool stream_write_update(ReplayDBStmt *replayStmt, + LogicalMessageUpdate *update); + +static bool stream_write_delete(ReplayDBStmt *replayStmt, + LogicalMessageDelete *delete); + +static bool stream_write_truncate(ReplayDBStmt *replayStmt, + LogicalMessageTruncate *truncate); + + +/* + * stream_transform_messages loops over the CDC files and transform messages in + * there. + */ +bool +stream_transform_messages(StreamSpecs *specs) +{ + StreamContext *privateContext = &(specs->private); + + if (!stream_transform_context_init(specs)) + { + /* errors have already been logged */ + return false; + } + + if (!stream_transform_resume(specs)) + { + log_error("Failed to resume streaming from %X/%X", + LSN_FORMAT_ARGS(privateContext->startpos)); + return false; + } + + while (privateContext->endpos == InvalidXLogRecPtr || + specs->sentinel.transform_lsn <= privateContext->endpos) + { + if (asked_to_stop || asked_to_stop_fast || asked_to_quit) + { + log_debug("stream_transform_messages was asked to stop"); + return true; + } + + if (!ld_store_set_cdc_filename_at_lsn(specs, + specs->sentinel.transform_lsn)) + { + log_error("Failed to find CDC file at lsn %X/%X, " + "see above for details", + LSN_FORMAT_ARGS(specs->sentinel.transform_lsn)); + return false; + } + + if (!stream_transform_cdc_file(specs)) + { + log_error("Failed to transform CDC messages from file \"%s\", " + "see above for details", + specs->replayDB->dbfile); + return false; + } + + /* allow some time for the files and content to be created */ + pg_usleep(150 * 100); + } + + log_info("Transform reached end position %X/%X at %X/%X", + LSN_FORMAT_ARGS(privateContext->endpos), + LSN_FORMAT_ARGS(specs->sentinel.transform_lsn)); + + return true; +} + + /* * stream_transform_context_init initializes StreamContext for the transform * operation. @@ -109,6 +188,159 @@ stream_transform_context_init(StreamSpecs *specs) } +/* + * stream_transform_cdc_file loops through a SQLite CDC file and transform + * messages found in the file. + */ +bool +stream_transform_cdc_file(StreamSpecs *specs) +{ + log_notice("Transforming Logical Decoding messages from file \"%s\"", + specs->replayDB->dbfile); + + if (!ld_store_iter_output(specs, &stream_transform_cdc_file_hook)) + { + log_error("Failed to iterate over CDC file \"%s\", " + "see above for details", + specs->replayDB->dbfile); + return false; + } + + return true; +} + + +/* + * stream_transform_cdc_file_hook is an iterator callback function. + */ +static bool +stream_transform_cdc_file_hook(StreamSpecs *specs, + ReplayDBOutputMessage *output, + bool *stop) +{ + CopyDBSentinel *sentinel = &(specs->sentinel); + StreamContext *privateContext = &(specs->private); + LogicalMessageMetadata *metadata = &(privateContext->metadata); + + /* parse the logical decoding output */ + if (!stream_transform_message(privateContext, output->jsonBuffer)) + { + /* errors have already been logged */ + return false; + } + + /* insert the message into the SQLite replayDB (stmt, replay) */ + if (!stream_transform_write_transaction(specs)) + { + /* errors have already been logged */ + return false; + } + + /* + * At COMMIT, ROLLBACK, and KEEPALIVE, sync the sentinel transform_lsn. + * At SWITCH, also sync transform_lsn so that we move on to the next file. + */ + if (metadata->action == STREAM_ACTION_COMMIT || + metadata->action == STREAM_ACTION_ROLLBACK || + metadata->action == STREAM_ACTION_SWITCH || + metadata->action == STREAM_ACTION_KEEPALIVE) + { + if (!sentinel_sync_transform(specs->sourceDB, metadata->lsn, sentinel)) + { + /* errors have already been logged */ + return false; + } + + /* SWITCH is expected to be the last entry in the file */ + if (metadata->action == STREAM_ACTION_SWITCH) + { + *stop = true; + } + } + /* at ENDPOS check that it's the current sentinel value and exit */ + else if (metadata->action == STREAM_ACTION_ENDPOS) + { + if (!sentinel_sync_transform(specs->sourceDB, metadata->lsn, sentinel)) + { + /* errors have already been logged */ + return false; + } + + if (sentinel->endpos != InvalidXLogRecPtr && + sentinel->endpos <= metadata->lsn) + { + *stop = true; + + log_info("Transform process reached ENDPOS %X/%X", + LSN_FORMAT_ARGS(metadata->lsn)); + + return true; + } + } + + if (privateContext->endpos != InvalidXLogRecPtr && + privateContext->endpos <= metadata->lsn) + { + *stop = true; + + log_info("Transform reached end position %X/%X at %X/%X", + LSN_FORMAT_ARGS(privateContext->endpos), + LSN_FORMAT_ARGS(metadata->lsn)); + } + + return true; +} + + +/* + * stream_transform_write_transaction updates the SQLite replayDB with the stmt + * and replay messages as processed from the logical decoding plugin output. + */ +bool +stream_transform_write_transaction(StreamSpecs *specs) +{ + StreamContext *privateContext = &(specs->private); + + LogicalMessage *currentMsg = &(privateContext->currentMsg); + LogicalMessageMetadata *metadata = &(privateContext->metadata); + + switch (metadata->action) + { + case STREAM_ACTION_COMMIT: + case STREAM_ACTION_ROLLBACK: + case STREAM_ACTION_KEEPALIVE: + case STREAM_ACTION_SWITCH: + case STREAM_ACTION_ENDPOS: + { + /* insert the transaction into the SQLite replayDB (stmt, replay) */ + if (!stream_transform_write_replay_stmt(specs)) + { + /* errors have already been logged */ + return false; + } + + /* then prepare a new transaction, reusing the same memory area */ + LogicalMessage empty = { 0 }; + *currentMsg = empty; + + return true; + } + + /* + * Skip DML messages, we update one full transaction at a time to allow + * for INSERT rewrites with multiple-values and other SQL level + * optimisations. + */ + default: + { + return true; + } + } + + return true; +} + + /* * stream_transform_stream transforms a JSON formatted input stream (read line * by line) as received from the wal2json logical decoding plugin into an SQL @@ -216,8 +448,6 @@ stream_transform_stream_internal(StreamSpecs *specs) bool stream_transform_resume(StreamSpecs *specs) { - StreamContext *privateContext = &(specs->private); - /* * Now grab the current sentinel values, specifically the current endpos. * @@ -225,9 +455,9 @@ stream_transform_resume(StreamSpecs *specs) * command line option (found in specs->endpos) prevails, but when it's not * been used, we have a look at the sentinel value. */ - CopyDBSentinel sentinel = { 0 }; + CopyDBSentinel *sentinel = &(specs->sentinel); - if (!sentinel_get(specs->sourceDB, &sentinel)) + if (!sentinel_get(specs->sourceDB, sentinel)) { /* errors have already been logged */ return false; @@ -235,21 +465,23 @@ stream_transform_resume(StreamSpecs *specs) log_notice("stream_transform_resume: " "startpos %X/%X endpos %X/%X " - "write_lsn %X/%X flush_lsn %X/%X replay_lsn %X/%X", - LSN_FORMAT_ARGS(sentinel.startpos), - LSN_FORMAT_ARGS(sentinel.endpos), - LSN_FORMAT_ARGS(sentinel.write_lsn), - LSN_FORMAT_ARGS(sentinel.flush_lsn), - LSN_FORMAT_ARGS(sentinel.replay_lsn)); + "write_lsn %X/%X transform_lsn %X/%X flush_lsn %X/%X " + "replay_lsn %X/%X", + LSN_FORMAT_ARGS(sentinel->startpos), + LSN_FORMAT_ARGS(sentinel->endpos), + LSN_FORMAT_ARGS(sentinel->write_lsn), + LSN_FORMAT_ARGS(sentinel->transform_lsn), + LSN_FORMAT_ARGS(sentinel->flush_lsn), + LSN_FORMAT_ARGS(sentinel->replay_lsn)); if (specs->endpos == InvalidXLogRecPtr) { - specs->endpos = sentinel.endpos; + specs->endpos = sentinel->endpos; } - else if (specs->endpos != sentinel.endpos) + else if (specs->endpos != sentinel->endpos) { log_warn("Sentinel endpos is %X/%X, overriden by --endpos %X/%X", - LSN_FORMAT_ARGS(sentinel.endpos), + LSN_FORMAT_ARGS(sentinel->endpos), LSN_FORMAT_ARGS(specs->endpos)); } @@ -262,9 +494,9 @@ stream_transform_resume(StreamSpecs *specs) /* if we have a startpos, that's better than using 0/0 at init time */ if (specs->startpos == InvalidXLogRecPtr) { - if (sentinel.startpos != InvalidXLogRecPtr) + if (sentinel->startpos != InvalidXLogRecPtr) { - specs->startpos = sentinel.startpos; + specs->startpos = sentinel->startpos; log_notice("Resuming transform at LSN %X/%X from sentinel", LSN_FORMAT_ARGS(specs->startpos)); @@ -280,47 +512,6 @@ stream_transform_resume(StreamSpecs *specs) return false; } - if (!ld_store_open_replaydb(specs)) - { - /* errors have already been logged */ - return false; - } - - char jsonFileName[MAXPGPATH] = { 0 }; - char sqlFileName[MAXPGPATH] = { 0 }; - - if (!stream_compute_pathnames(privateContext->WalSegSz, - privateContext->timeline, - privateContext->startpos, - privateContext->paths.dir, - jsonFileName, - sqlFileName)) - { - /* errors have already been logged */ - return false; - } - - log_notice("Transforming from %X/%X in \"%s\"", - LSN_FORMAT_ARGS(privateContext->startpos), - sqlFileName); - - /* - * If the JSON file already exists on-disk, make sure to read it file again - * now. The previous round of streaming might have stopped at an endpos - * that fits in the middle of a transaction. - * - * We can think about this as "cache invalidation" of the SQL file on-disk. - */ - if (file_exists(jsonFileName)) - { - if (!stream_transform_file(specs, jsonFileName, sqlFileName)) - { - log_error("Failed to resume transforming from existing file \"%s\"", - sqlFileName); - return false; - } - } - return true; } @@ -469,21 +660,21 @@ stream_transform_write_message(StreamContext *privateContext, } /* now write the transaction out */ - if (privateContext->out != NULL) - { - if (!stream_write_message(privateContext->out, currentMsg)) - { - /* errors have already been logged */ - return false; - } - } - - /* now write the transaction out also to file on-disk */ - if (!stream_write_message(privateContext->sqlFile, currentMsg)) - { - /* errors have already been logged */ - return false; - } + /* if (privateContext->out != NULL) */ + /* { */ + /* if (!stream_write_message(privateContext->out, currentMsg)) */ + /* { */ + /* /\* errors have already been logged *\/ */ + /* return false; */ + /* } */ + /* } */ + + /* /\* now write the transaction out also to file on-disk *\/ */ + /* if (!stream_write_message(privateContext->sqlFile, currentMsg)) */ + /* { */ + /* /\* errors have already been logged *\/ */ + /* return false; */ + /* } */ if (metadata->action == STREAM_ACTION_COMMIT || metadata->action == STREAM_ACTION_ROLLBACK) @@ -553,7 +744,6 @@ stream_transform_message(StreamContext *privateContext, char *message) return false; } - return true; } @@ -1647,44 +1837,48 @@ AllocateLogicalMessageTuple(LogicalMessageTuple *tuple, int count) /* - * stream_write_message writes the LogicalMessage statement(s) as SQL to the - * already open out stream. + * stream_transform_write_replay_stmt writes the current message to the + * replayDB stmt and replay tables. */ bool -stream_write_message(FILE *out, LogicalMessage *msg) +stream_transform_write_replay_stmt(StreamSpecs *specs) { + StreamContext *privateContext = &(specs->private); + LogicalMessage *msg = &(privateContext->currentMsg); + if (msg->isTransaction) { - return stream_write_transaction(out, &(msg->command.tx)); + if (!stream_transform_write_replay_txn(specs)) + { + /* errors have already been logged */ + return false; + } } else { + ReplayDBStmt replayStmt = { .action = msg->action }; + switch (msg->action) { case STREAM_ACTION_SWITCH: { - if (!stream_write_switchwal(out, &(msg->command.switchwal))) - { - return false; - } + replayStmt.lsn = msg->command.switchwal.lsn; break; } case STREAM_ACTION_KEEPALIVE: { - if (!stream_write_keepalive(out, &(msg->command.keepalive))) - { - return false; - } + replayStmt.lsn = msg->command.keepalive.lsn; + + strlcpy(replayStmt.timestamp, + msg->command.keepalive.timestamp, + sizeof(replayStmt.timestamp)); break; } case STREAM_ACTION_ENDPOS: { - if (!stream_write_endpos(out, &(msg->command.endpos))) - { - return false; - } + replayStmt.lsn = msg->command.endpos.lsn; break; } @@ -1695,110 +1889,54 @@ stream_write_message(FILE *out, LogicalMessage *msg) return false; } } + + if (!ld_store_insert_replay_stmt(specs->replayDB, &replayStmt)) + { + /* errors have already been logged */ + return false; + } } - return true; + return false; } /* - * stream_write_transaction writes the LogicalTransaction statements as SQL to - * the already open out stream. + * stream_transform_write_replay_txn walks through a transaction's list of + * statements and inserts them in the replayDB stmt and replay tables. */ bool -stream_write_transaction(FILE *out, LogicalTransaction *txn) +stream_transform_write_replay_txn(StreamSpecs *specs) { - /* - * Logical decoding also outputs empty transactions that act here kind of - * like a keepalive stream. These transactions might represent activity in - * other databases or background activity in the source Postgres instance - * where the LSN is moving forward. We want to replay them. - */ - if (!txn->continued && txn->count == 0) - { - if (!stream_write_begin(out, txn)) - { - return false; - } + StreamContext *privateContext = &(specs->private); + LogicalMessage *msg = &(privateContext->currentMsg); + LogicalTransaction *txn = &(msg->command.tx); - if (!stream_write_commit(out, txn)) - { - return false; - } + ReplayDBStmt begin = { + .action = STREAM_ACTION_BEGIN, + .xid = txn->xid, + .lsn = txn->beginLSN + }; - return true; - } + strlcpy(begin.timestamp, txn->timestamp, sizeof(begin.timestamp)); - /* - * Now we deal with non-empty transactions. - * - * SWITCH WAL commands might appear eigher in the middle of a transaction - * or in between two transactions, depending on when the LSN WAL file - * switch happens on the source server. - */ - bool sentBEGIN = false; - bool splitTx = false; + if (!ld_store_insert_replay_stmt(specs->replayDB, &begin)) + { + /* errors have already been logged */ + return false; + } LogicalTransactionStatement *currentStmt = txn->first; for (; currentStmt != NULL; currentStmt = currentStmt->next) { + ReplayDBStmt stmt = { .action = currentStmt->action }; + switch (currentStmt->action) { - case STREAM_ACTION_SWITCH: - { - if (sentBEGIN) - { - splitTx = true; - } - - if (!stream_write_switchwal(out, &(currentStmt->stmt.switchwal))) - { - return false; - } - break; - } - - case STREAM_ACTION_KEEPALIVE: - { - if (sentBEGIN) - { - splitTx = true; - } - - if (!stream_write_keepalive(out, &(currentStmt->stmt.keepalive))) - { - return false; - } - break; - } - - case STREAM_ACTION_ENDPOS: - { - if (sentBEGIN) - { - splitTx = true; - } - - if (!stream_write_endpos(out, &(currentStmt->stmt.endpos))) - { - return false; - } - break; - } - case STREAM_ACTION_INSERT: { - if (!sentBEGIN && !txn->continued) - { - if (!stream_write_begin(out, txn)) - { - return false; - } - sentBEGIN = true; - } - - if (!stream_write_insert(out, &(currentStmt->stmt.insert))) + if (!stream_write_insert(&stmt, &(currentStmt->stmt.insert))) { return false; } @@ -1808,16 +1946,7 @@ stream_write_transaction(FILE *out, LogicalTransaction *txn) case STREAM_ACTION_UPDATE: { - if (!sentBEGIN && !txn->continued) - { - if (!stream_write_begin(out, txn)) - { - return false; - } - sentBEGIN = true; - } - - if (!stream_write_update(out, &(currentStmt->stmt.update))) + if (!stream_write_update(&stmt, &(currentStmt->stmt.update))) { return false; } @@ -1826,16 +1955,7 @@ stream_write_transaction(FILE *out, LogicalTransaction *txn) case STREAM_ACTION_DELETE: { - if (!sentBEGIN && !txn->continued) - { - if (!stream_write_begin(out, txn)) - { - return false; - } - sentBEGIN = true; - } - - if (!stream_write_delete(out, &(currentStmt->stmt.delete))) + if (!stream_write_delete(&stmt, &(currentStmt->stmt.delete))) { return false; } @@ -1844,16 +1964,7 @@ stream_write_transaction(FILE *out, LogicalTransaction *txn) case STREAM_ACTION_TRUNCATE: { - if (!sentBEGIN && !txn->continued) - { - if (!stream_write_begin(out, txn)) - { - return false; - } - sentBEGIN = true; - } - - if (!stream_write_truncate(out, &(currentStmt->stmt.truncate))) + if (!stream_write_truncate(&stmt, &(currentStmt->stmt.truncate))) { return false; } @@ -1867,171 +1978,37 @@ stream_write_transaction(FILE *out, LogicalTransaction *txn) return false; } } - } - - /* - * Some transactions might be spanning over multiple WAL.{json,sql} files, - * because it just happened at the boundary LSN. In that case we don't want - * to send the COMMIT message yet. - * - * Continued transaction are then represented using several instances of - * our LogicalTransaction data structure, and the last one of the series - * then have the txn->commit metadata forcibly set to true: here we also - * need to obey that. - */ - if ((sentBEGIN && !splitTx) || txn->commit) - { - if (!stream_write_commit(out, txn)) - { - return false; - } - } - if (txn->rollback) - { - if (!stream_write_rollback(out, txn)) + if (!ld_store_insert_replay_stmt(specs->replayDB, &stmt)) { + /* errors have already been logged */ return false; } } - /* flush out stream at transaction boundaries */ - if (fflush(out) != 0) - { - log_error("Failed to flush stream output: %m"); - return false; - } - - return true; -} - + ReplayDBStmt end = { + .action = txn->rollback ? STREAM_ACTION_ROLLBACK : STREAM_ACTION_COMMIT, + .xid = txn->xid, + .lsn = txn->rollback ? txn->rollbackLSN : txn->commitLSN + }; -#define FFORMAT(stream, fmt, ...) \ - { if (fformat(stream, fmt, __VA_ARGS__) == -1) { \ - log_error("Failed to write to stream: %m"); \ - return false; } \ - } + strlcpy(end.timestamp, txn->timestamp, sizeof(end.timestamp)); -/* - * stream_write_begin writes a BEGIN statement to the already open out stream. - */ -bool -stream_write_begin(FILE *out, LogicalTransaction *txn) -{ - /* include commit_lsn only if the transaction has commitLSN */ - if (txn->commitLSN != InvalidXLogRecPtr) + if (!ld_store_insert_replay_stmt(specs->replayDB, &end)) { - FFORMAT(out, - "%s{\"xid\":%lld,\"lsn\":\"%X/%X\",\"timestamp\":\"%s\",\"commit_lsn\":\"%X/%X\"}\n", - OUTPUT_BEGIN, - (long long) txn->xid, - LSN_FORMAT_ARGS(txn->beginLSN), - txn->timestamp, - LSN_FORMAT_ARGS(txn->commitLSN)); - } - else - { - FFORMAT(out, - "%s{\"xid\":%lld,\"lsn\":\"%X/%X\",\"timestamp\":\"%s\"}\n", - OUTPUT_BEGIN, - (long long) txn->xid, - LSN_FORMAT_ARGS(txn->beginLSN), - txn->timestamp); + /* errors have already been logged */ + return false; } - /* keep compiler happy */ return true; } /* - * stream_write_rollback writes a COMMIT statement to the already open out - * stream. + * stream_write_insert writes an INSERT statement. */ -bool -stream_write_rollback(FILE *out, LogicalTransaction *txn) -{ - FFORMAT(out, - "%s{\"xid\":%lld,\"lsn\":\"%X/%X\",\"timestamp\":\"%s\"}\n", - OUTPUT_ROLLBACK, - (long long) txn->xid, - LSN_FORMAT_ARGS(txn->rollbackLSN), - txn->timestamp); - - return true; -} - - -/* - * stream_write_commit writes a COMMIT statement to the already open out - * stream. - */ -bool -stream_write_commit(FILE *out, LogicalTransaction *txn) -{ - FFORMAT(out, - "%s{\"xid\":%lld,\"lsn\":\"%X/%X\",\"timestamp\":\"%s\"}\n", - OUTPUT_COMMIT, - (long long) txn->xid, - LSN_FORMAT_ARGS(txn->commitLSN), - txn->timestamp); - - return true; -} - - -/* - * stream_write_switchwal writes a SWITCH statement to the already open out - * stream. - */ -bool -stream_write_switchwal(FILE *out, LogicalMessageSwitchWAL *switchwal) -{ - FFORMAT(out, "%s{\"lsn\":\"%X/%X\"}\n", - OUTPUT_SWITCHWAL, - LSN_FORMAT_ARGS(switchwal->lsn)); - - return true; -} - - -/* - * stream_write_keepalive writes a KEEPALIVE statement to the already open out - * stream. - */ -bool -stream_write_keepalive(FILE *out, LogicalMessageKeepalive *keepalive) -{ - FFORMAT(out, "%s{\"lsn\":\"%X/%X\",\"timestamp\":\"%s\"}\n", - OUTPUT_KEEPALIVE, - LSN_FORMAT_ARGS(keepalive->lsn), - keepalive->timestamp); - - return true; -} - - -/* - * stream_write_endpos writes a SWITCH statement to the already open out - * stream. - */ -bool -stream_write_endpos(FILE *out, LogicalMessageEndpos *endpos) -{ - FFORMAT(out, "%s{\"lsn\":\"%X/%X\"}\n", - OUTPUT_ENDPOS, - LSN_FORMAT_ARGS(endpos->lsn)); - - return true; -} - - -/* - * stream_write_insert writes an INSERT statement to the already open out - * stream. - */ -bool -stream_write_insert(FILE *out, LogicalMessageInsert *insert) +static bool +stream_write_insert(ReplayDBStmt *replayStmt, LogicalMessageInsert *insert) { /* loop over INSERT statements targeting the same table */ for (int s = 0; s < insert->new.count; s++) @@ -2131,19 +2108,15 @@ stream_write_insert(FILE *out, LogicalMessageInsert *insert) return false; } + /* compute the hash and prepare the JSONB data array */ uint32_t hash = hashlittle(buf->data, buf->len, 5381); - - FFORMAT(out, "PREPARE %x AS %s;\n", hash, buf->data); - - destroyPQExpBuffer(buf); - - /* - * Second, the EXECUTE part. - */ char *serialized_string = json_serialize_to_string(js); - FFORMAT(out, "EXECUTE %x%s;\n", hash, serialized_string); + replayStmt->hash = hash; + replayStmt->stmt = strdup(buf->data); + replayStmt->data = strdup(serialized_string); + destroyPQExpBuffer(buf); json_free_serialized_string(serialized_string); } @@ -2152,11 +2125,10 @@ stream_write_insert(FILE *out, LogicalMessageInsert *insert) /* - * stream_write_update writes an UPDATE statement to the already open out - * stream. + * stream_write_update writes an UPDATE statement. */ -bool -stream_write_update(FILE *out, LogicalMessageUpdate *update) +static bool +stream_write_update(ReplayDBStmt *replayStmt, LogicalMessageUpdate *update) { if (update->old.count != update->new.count) { @@ -2304,8 +2276,8 @@ stream_write_update(FILE *out, LogicalMessageUpdate *update) if (value->isNull) { /* - * Attributes with the value `NULL` require `IS NULL` instead of `=` - * in the WHERE clause. + * Attributes with the value `NULL` require `IS NULL` + * instead of `=` in the WHERE clause. */ appendPQExpBuffer(buf, "%s%s IS NULL", v > 0 ? " and " : "", @@ -2330,24 +2302,20 @@ stream_write_update(FILE *out, LogicalMessageUpdate *update) if (PQExpBufferBroken(buf)) { - log_error("Failed to transform INSERT statement: Out of Memory"); + log_error("Failed to transform UPDATE statement: Out of Memory"); destroyPQExpBuffer(buf); return false; } + /* compute the hash and prepare the JSONB data array */ uint32_t hash = hashlittle(buf->data, buf->len, 5381); - - FFORMAT(out, "PREPARE %x AS %s;\n", hash, buf->data); - - destroyPQExpBuffer(buf); - - /* - * Second, the EXECUTE part. - */ char *serialized_string = json_serialize_to_string(js); - FFORMAT(out, "EXECUTE %x%s;\n", hash, serialized_string); + replayStmt->hash = hash; + replayStmt->stmt = strdup(buf->data); + replayStmt->data = strdup(serialized_string); + destroyPQExpBuffer(buf); json_free_serialized_string(serialized_string); } @@ -2356,11 +2324,10 @@ stream_write_update(FILE *out, LogicalMessageUpdate *update) /* - * stream_write_delete writes an DELETE statement to the already open out - * stream. + * stream_write_delete writes an DELETE statement. */ -bool -stream_write_delete(FILE *out, LogicalMessageDelete *delete) +static bool +stream_write_delete(ReplayDBStmt *replayStmt, LogicalMessageDelete *delete) { /* loop over DELETE statements targeting the same table */ for (int s = 0; s < delete->old.count; s++) @@ -2403,8 +2370,8 @@ stream_write_delete(FILE *out, LogicalMessageDelete *delete) if (value->isNull) { /* - * Attributes with the value `NULL` require `IS NULL` instead of `=` - * in the WHERE clause. + * Attributes with the value `NULL` require `IS NULL` + * instead of `=` in the WHERE clause. */ appendPQExpBuffer(buf, "%s%s IS NULL", v > 0 ? " and " : "", @@ -2427,19 +2394,22 @@ stream_write_delete(FILE *out, LogicalMessageDelete *delete) } } - uint32_t hash = hashlittle(buf->data, buf->len, 5381); - - FFORMAT(out, "PREPARE %x AS %s;\n", hash, buf->data); - - destroyPQExpBuffer(buf); + if (PQExpBufferBroken(buf)) + { + log_error("Failed to transform DELETE statement: Out of Memory"); + destroyPQExpBuffer(buf); + return false; + } - /* - * Second, the EXECUTE part. - */ + /* compute the hash and prepare the JSONB data array */ + uint32_t hash = hashlittle(buf->data, buf->len, 5381); char *serialized_string = json_serialize_to_string(js); - FFORMAT(out, "EXECUTE %x%s;\n", hash, serialized_string); + replayStmt->hash = hash; + replayStmt->stmt = strdup(buf->data); + replayStmt->data = strdup(serialized_string); + destroyPQExpBuffer(buf); json_free_serialized_string(serialized_string); } @@ -2448,16 +2418,31 @@ stream_write_delete(FILE *out, LogicalMessageDelete *delete) /* - * stream_write_truncate writes an TRUNCATE statement to the already open out - * stream. + * stream_write_truncate writes an TRUNCATE statement. */ -bool -stream_write_truncate(FILE *out, LogicalMessageTruncate *truncate) +static bool +stream_write_truncate(ReplayDBStmt *replayStmt, LogicalMessageTruncate *truncate) { - FFORMAT(out, - "TRUNCATE ONLY %s.%s\n", - truncate->table.nspname, - truncate->table.relname); + PQExpBuffer buf = createPQExpBuffer(); + + printfPQExpBuffer(buf, "TRUNCATE ONLY %s.%s\n", + truncate->table.nspname, + truncate->table.relname); + + if (PQExpBufferBroken(buf)) + { + log_error("Failed to transform TRUNCATE statement: Out of Memory"); + destroyPQExpBuffer(buf); + return false; + } + + /* compute the hash and prepare the JSONB data array */ + uint32_t hash = hashlittle(buf->data, buf->len, 5381); + + replayStmt->hash = hash; + replayStmt->stmt = strdup(buf->data); + + destroyPQExpBuffer(buf); return true; } @@ -2540,73 +2525,6 @@ stream_add_value_in_json_array(LogicalMessageValue *value, JSON_Array *jsArray) } -/* - * stream_write_sql_escape_string_constant writes given str to out and follows - * the Postgres syntax for String Constants With C-Style Escapes, as documented - * at the following URL: - * - * https://www.postgresql.org/docs/current/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS - */ -bool -stream_write_sql_escape_string_constant(FILE *out, const char *str) -{ - FFORMAT(out, "%s", "E'"); - - for (int i = 0; str[i] != '\0'; i++) - { - switch (str[i]) - { - case '\b': - { - FFORMAT(out, "%s", "\\b"); - break; - } - - case '\f': - { - FFORMAT(out, "%s", "\\f"); - break; - } - - case '\n': - { - FFORMAT(out, "%s", "\\n"); - break; - } - - case '\r': - { - FFORMAT(out, "%s", "\\r"); - break; - } - - case '\t': - { - FFORMAT(out, "%s", "\\t"); - break; - } - - case '\'': - case '\\': - { - FFORMAT(out, "\\%c", str[i]); - break; - } - - default: - { - FFORMAT(out, "%c", str[i]); - break; - } - } - } - - FFORMAT(out, "%s", "'"); - - return true; -} - - /* * LogicalMessageValueEq compares two LogicalMessageValue instances and return * true when they represent the same value. NULL are considered Equal, like in diff --git a/src/bin/pgcopydb/pgsql.c b/src/bin/pgcopydb/pgsql.c index 72d3a5acb..3c1a1dad3 100644 --- a/src/bin/pgcopydb/pgsql.c +++ b/src/bin/pgcopydb/pgsql.c @@ -3629,6 +3629,7 @@ pg_copy_large_object(PGSQL *src, bool pgsql_init_stream(LogicalStreamClient *client, const char *pguri, + const char *cdcPathDir, StreamOutputPlugin plugin, const char *slotName, XLogRecPtr startpos, @@ -3645,6 +3646,8 @@ pgsql_init_stream(LogicalStreamClient *client, /* we're going to send several replication commands */ pgsql->connectionStatementType = PGSQL_CONNECTION_MULTI_STATEMENT; + strlcpy(client->cdcPathDir, cdcPathDir, sizeof(client->cdcPathDir)); + client->plugin = plugin; strlcpy(client->slotName, slotName, sizeof(client->slotName)); @@ -3752,6 +3755,13 @@ pgsql_create_logical_replication_slot(LogicalStreamClient *client, return false; } + /* fetch the source timeline */ + if (!pgsql_identify_system(pgsql, &(client->system), client->cdcPathDir)) + { + /* errors have already been logged */ + return false; + } + PGresult *result = PQexec(pgsql->connection, query); if (PQresultStatus(result) != PGRES_TUPLES_OK) diff --git a/src/bin/pgcopydb/pgsql.h b/src/bin/pgcopydb/pgsql.h index 0ec680c95..920d94c47 100644 --- a/src/bin/pgcopydb/pgsql.h +++ b/src/bin/pgcopydb/pgsql.h @@ -496,6 +496,7 @@ typedef struct LogicalStreamClient bool pgsql_init_stream(LogicalStreamClient *client, const char *pguri, + const char *cdcPathDir, StreamOutputPlugin plugin, const char *slotName, XLogRecPtr startpos, diff --git a/src/bin/pgcopydb/pgsql_timeline.c b/src/bin/pgcopydb/pgsql_timeline.c index 8fbc0686f..1ddca6d51 100644 --- a/src/bin/pgcopydb/pgsql_timeline.c +++ b/src/bin/pgcopydb/pgsql_timeline.c @@ -283,10 +283,13 @@ parse_timeline_history_file(char *filename, }; /* step 2: iterate over the file */ - if (!file_iter_lines(filename, BUFSIZE, &context, register_timeline_hook)) + if (currentTimeline > 1) { - /* errors have already been logged */ - return false; + if (!file_iter_lines(filename, BUFSIZE, &context, register_timeline_hook)) + { + /* errors have already been logged */ + return false; + } } /* step 3: add the current timeline to catalog */ diff --git a/src/bin/pgcopydb/snapshot.c b/src/bin/pgcopydb/snapshot.c index 314bd54cb..4f1720a50 100644 --- a/src/bin/pgcopydb/snapshot.c +++ b/src/bin/pgcopydb/snapshot.c @@ -9,6 +9,7 @@ #include "copydb.h" #include "ld_stream.h" #include "log.h" +#include "pgsql_timeline.h" /* * copydb_copy_snapshot initializes a new TransactionSnapshot from another @@ -375,7 +376,8 @@ copydb_should_export_snapshot(CopyDataSpec *copySpecs) bool copydb_create_logical_replication_slot(CopyDataSpec *copySpecs, const char *logrep_pguri, - ReplicationSlot *slot) + ReplicationSlot *slot, + char *cdcPathDir) { TransactionSnapshot *sourceSnapshot = &(copySpecs->sourceSnapshot); @@ -416,6 +418,7 @@ copydb_create_logical_replication_slot(CopyDataSpec *copySpecs, if (!pgsql_init_stream(stream, logrep_pguri, + cdcPathDir, slot->plugin, slot->slotName, InvalidXLogRecPtr, @@ -425,13 +428,6 @@ copydb_create_logical_replication_slot(CopyDataSpec *copySpecs, return false; } - /* fetch the source timeline */ - if (!pgsql_identify_system(&(stream->pgsql), &(stream->system))) - { - /* errors have already been logged */ - return false; - } - /* now create the replication slot, exporting the snapshot */ if (!pgsql_create_logical_replication_slot(stream, slot)) { diff --git a/tests/cdc-low-level/copydb.sh b/tests/cdc-low-level/copydb.sh index d00d68128..bddab2ddb 100755 --- a/tests/cdc-low-level/copydb.sh +++ b/tests/cdc-low-level/copydb.sh @@ -53,11 +53,23 @@ SQLFILE=000000010000000000000002.sql # different at each run expected=/tmp/expected.json result=/tmp/result.json +messages=/tmp/messages.json -JQSCRIPT='del(.lsn) | del(.nextlsn) | del(.timestamp) | del(.xid) | if has("message") then .message |= sub("(?COMMIT|BEGIN) [0-9]+"; "\(.m) XXX") else . end' +ls -l ${SHAREDIR} + +# make a JSON file from our SQLite database, for testing +sqlite3 ${SHAREDIR}/*.db < ${expected} -jq "${JQSCRIPT}" ${SHAREDIR}/${WALFILE} > ${result} +jq "${JQSCRIPT}" ${messages} > ${result} # first command to provide debug information, second to stop when returns non-zero diff ${expected} ${result} || cat ${SHAREDIR}/${WALFILE} From e19f2223b98a43c0b0647c3239cd76af0d92ae4c Mon Sep 17 00:00:00 2001 From: Dimitri Fontaine Date: Thu, 12 Sep 2024 18:26:31 +0200 Subject: [PATCH 5/8] Refactor ld_transform with the ld_store idea. --- src/bin/pgcopydb/catalog.c | 20 +- src/bin/pgcopydb/cli_sentinel.c | 18 +- src/bin/pgcopydb/ld_apply.c | 2 + src/bin/pgcopydb/ld_store.c | 625 ++++++++++++++++++++++++-------- src/bin/pgcopydb/ld_store.h | 17 +- src/bin/pgcopydb/ld_stream.c | 89 ++++- src/bin/pgcopydb/ld_stream.h | 12 + src/bin/pgcopydb/ld_transform.c | 400 +++++++++++++------- src/bin/pgcopydb/ld_wal2json.c | 16 +- src/bin/pgcopydb/sentinel.c | 5 +- tests/follow-wal2json/copydb.sh | 14 +- 11 files changed, 904 insertions(+), 314 deletions(-) diff --git a/src/bin/pgcopydb/catalog.c b/src/bin/pgcopydb/catalog.c index 685786e3c..543405773 100644 --- a/src/bin/pgcopydb/catalog.c +++ b/src/bin/pgcopydb/catalog.c @@ -423,18 +423,18 @@ static char *targetDBcreateDDLs[] = { static char *replayDBcreateDDLs[] = { "create table output(" " id integer primary key, " - " action text, xid integer, lsn pg_lsn, timestamp text, " + " action text, xid integer, lsn integer, timestamp text, " " message text)", "create unique index o_a_lsn on output(action, lsn)", - "create unique index o_a_xid on output(action, xid)", + "create index o_a_xid on output(action, xid)", "create table stmt(hash text primary key, sql text)", "create unique index stmt_hash on stmt(hash)", "create table replay(" " id integer primary key, " - " action text, xid integer, lsn pg_lsn, timestamp text, " + " action text, xid integer, lsn integer, endlsn integer, timestamp text, " " stmt_hash text references stmt(hash), stmt_args jsonb)", "create index r_xid on replay(xid)", @@ -8174,7 +8174,19 @@ catalog_sql_execute(SQLiteQuery *query) if (rc != SQLITE_ROW) { log_error("Failed to step through statement: %s", query->sql); - log_error("[SQLite %d] %s", rc, sqlite3_errstr(rc)); + + int offset = sqlite3_error_offset(query->db); + + if (offset != -1) + { + /* "Failed to step through statement: %s" is 34 chars of prefix */ + log_error("%34s%*s^", " ", offset, " "); + } + + log_error("[SQLite %d: %s]: %s", + rc, + sqlite3_errstr(rc), + sqlite3_errmsg(query->db)); (void) sqlite3_clear_bindings(query->ppStmt); (void) sqlite3_finalize(query->ppStmt); diff --git a/src/bin/pgcopydb/cli_sentinel.c b/src/bin/pgcopydb/cli_sentinel.c index 8fef8f8e1..1f34ae405 100644 --- a/src/bin/pgcopydb/cli_sentinel.c +++ b/src/bin/pgcopydb/cli_sentinel.c @@ -663,6 +663,7 @@ cli_sentinel_get(int argc, char **argv) char startpos[PG_LSN_MAXLENGTH] = { 0 }; char endpos[PG_LSN_MAXLENGTH] = { 0 }; char write_lsn[PG_LSN_MAXLENGTH] = { 0 }; + char transform_lsn[PG_LSN_MAXLENGTH] = { 0 }; char flush_lsn[PG_LSN_MAXLENGTH] = { 0 }; char replay_lsn[PG_LSN_MAXLENGTH] = { 0 }; @@ -672,6 +673,8 @@ cli_sentinel_get(int argc, char **argv) LSN_FORMAT_ARGS(sentinel.endpos)); sformat(write_lsn, PG_LSN_MAXLENGTH, "%X/%X", LSN_FORMAT_ARGS(sentinel.write_lsn)); + sformat(transform_lsn, PG_LSN_MAXLENGTH, "%X/%X", + LSN_FORMAT_ARGS(sentinel.transform_lsn)); sformat(flush_lsn, PG_LSN_MAXLENGTH, "%X/%X", LSN_FORMAT_ARGS(sentinel.flush_lsn)); sformat(replay_lsn, PG_LSN_MAXLENGTH, "%X/%X", @@ -681,6 +684,7 @@ cli_sentinel_get(int argc, char **argv) json_object_set_string(jsobj, "endpos", startpos); json_object_set_boolean(jsobj, "apply", sentinel.apply); json_object_set_string(jsobj, "write_lsn", write_lsn); + json_object_set_string(jsobj, "transform_lsn", transform_lsn); json_object_set_string(jsobj, "flush_lsn", flush_lsn); json_object_set_string(jsobj, "replay_lsn", replay_lsn); @@ -692,17 +696,19 @@ cli_sentinel_get(int argc, char **argv) } else { - fformat(stdout, "%-10s %X/%X\n", "startpos", + fformat(stdout, "%-15s %X/%X\n", "startpos", LSN_FORMAT_ARGS(sentinel.startpos)); - fformat(stdout, "%-10s %X/%X\n", "endpos", + fformat(stdout, "%-15s %X/%X\n", "endpos", LSN_FORMAT_ARGS(sentinel.endpos)); - fformat(stdout, "%-10s %s\n", "apply", + fformat(stdout, "%-15s %s\n", "apply", sentinel.apply ? "enabled" : "disabled"); - fformat(stdout, "%-10s %X/%X\n", "write_lsn", + fformat(stdout, "%-15s %X/%X\n", "write_lsn", LSN_FORMAT_ARGS(sentinel.write_lsn)); - fformat(stdout, "%-10s %X/%X\n", "flush_lsn", + fformat(stdout, "%-15s %X/%X\n", "transform_lsn", + LSN_FORMAT_ARGS(sentinel.transform_lsn)); + fformat(stdout, "%-15s %X/%X\n", "flush_lsn", LSN_FORMAT_ARGS(sentinel.flush_lsn)); - fformat(stdout, "%-10s %X/%X\n", "replay_lsn", + fformat(stdout, "%-15s %X/%X\n", "replay_lsn", LSN_FORMAT_ARGS(sentinel.replay_lsn)); } } diff --git a/src/bin/pgcopydb/ld_apply.c b/src/bin/pgcopydb/ld_apply.c index ef63c7e12..cd0570efa 100644 --- a/src/bin/pgcopydb/ld_apply.c +++ b/src/bin/pgcopydb/ld_apply.c @@ -65,6 +65,8 @@ stream_apply_catchup(StreamSpecs *specs) { StreamApplyContext context = { 0 }; + return true; + if (!stream_apply_setup(specs, &context)) { log_error("Failed to setup for catchup, see above for details"); diff --git a/src/bin/pgcopydb/ld_store.c b/src/bin/pgcopydb/ld_store.c index e48b31e86..3876416d9 100644 --- a/src/bin/pgcopydb/ld_store.c +++ b/src/bin/pgcopydb/ld_store.c @@ -177,7 +177,8 @@ ld_store_set_cdc_filename_at_lsn(StreamSpecs *specs, uint64_t lsn) { if (lsn == InvalidXLogRecPtr) { - return ld_store_set_first_cdc_filename(specs); + log_debug("ld_store_set_cdc_filename_at_lsn: 0/0"); + return true; } sqlite3 *db = specs->sourceDB->db; @@ -254,7 +255,16 @@ ld_store_set_cdc_filename_at_lsn(StreamSpecs *specs, uint64_t lsn) } /* now check if the candidateDB contains the given LSN */ - if (ld_store_lookup_lsn(candidateDB, lsn)) + ReplayDBOutputMessage output = { 0 }; + + if (!ld_store_lookup_output_at_lsn(candidateDB, lsn, &output)) + { + /* errors have already been logged */ + return false; + } + + /* found it? then we opened the right replay db file */ + if (output.lsn == lsn) { specs->replayDB = candidateDB; break; @@ -278,25 +288,57 @@ ld_store_set_cdc_filename_at_lsn(StreamSpecs *specs, uint64_t lsn) /* - * ld_store_set_cdc_filename_at_lsn sets our replayDB dbfile value to the first - * CDC file that we have in the catalogs. + * ld_store_cdc_filename_fetch is a SQLiteQuery callback. */ bool -ld_store_set_first_cdc_filename(StreamSpecs *specs) +ld_store_cdc_filename_fetch(SQLiteQuery *query) { - sqlite3 *db = specs->sourceDB->db; + char *filename = (char *) query->context; + + if (sqlite3_column_type(query->ppStmt, 0) == SQLITE_NULL) + { + query->context = NULL; + } + else + { + strlcpy(filename, + (char *) sqlite3_column_text(query->ppStmt, 0), + MAXPGPATH); + } + + return true; +} + + +/* + * ld_store_lookup_output_at_lsn searches the given LSN in the given replayDB + * database. + */ +bool +ld_store_lookup_output_at_lsn(DatabaseCatalog *catalog, uint64_t lsn, + ReplayDBOutputMessage *output) +{ + sqlite3 *db = catalog->db; if (db == NULL) { - log_error("BUG: ld_store_first_cdc_filename: db is NULL"); + log_error("BUG: ld_store_lookup_output_at_lsn: db is NULL"); return false; } - char *sql = "select filename from cdc_files order by id limit 1"; + char *sql = + " select id, action, xid, lsn, timestamp, message " + " from output " + " where lsn = $1 " + "order by id " + " limit 1"; + + log_warn("ld_store_lookup_output_at_lsn: %X/%X", LSN_FORMAT_ARGS(lsn)); SQLiteQuery query = { - .context = specs->replayDB->dbfile, - .fetchFunction = &ld_store_cdc_filename_fetch + .errorOnZeroRows = true, + .context = output, + .fetchFunction = &ld_store_output_fetch }; if (!catalog_sql_prepare(db, sql, &query)) @@ -305,7 +347,18 @@ ld_store_set_first_cdc_filename(StreamSpecs *specs) return false; } - /* now execute the query, which does not return any row */ + /* bind our parameters now */ + BindParam params[1] = { + { BIND_PARAMETER_TYPE_INT64, "lsn", lsn }, + }; + + if (!catalog_sql_bind(&query, params, 1)) + { + /* errors have already been logged */ + return false; + } + + /* now execute the query, which return exactly one row */ if (!catalog_sql_execute_once(&query)) { /* errors have already been logged */ @@ -317,22 +370,76 @@ ld_store_set_first_cdc_filename(StreamSpecs *specs) /* - * ld_store_cdc_filename_fetch is a SQLiteQuery callback. + * ld_store_lookup_output_after_lsn searches the first message following the + * given LSN in the replayDB database. + * + * The same LSN would typically be used in Postgres for a COMMIT message and + * the BEGIN message of the following transaction, so we search for a message + * with an lsn greater than or equal to the given one, and a message that's + * neither a COMMIT nor a ROLLBACK. + * + * {"action":"C","xid":"499","lsn":"0/24E1B08"} + * {"action":"B","xid":"500","lsn":"0/24E1B08"} */ bool -ld_store_cdc_filename_fetch(SQLiteQuery *query) +ld_store_lookup_output_after_lsn(DatabaseCatalog *catalog, + uint64_t lsn, + ReplayDBOutputMessage *output) { - char *filename = (char *) query->context; + sqlite3 *db = catalog->db; - if (sqlite3_column_type(query->ppStmt, 0) == SQLITE_NULL) + if (db == NULL) { - query->context = NULL; + log_error("BUG: ld_store_lookup_output_after_lsn: db is NULL"); + return false; } - else + + char *sql = + " select id, action, xid, lsn, timestamp, message " + " from output " + + /* only a BEGIN action is expected to have the same LSN again */ + " where lsn >= $1 and action = 'B' " + " union all " + " select id, action, xid, lsn, timestamp, message " + " from output " + " where lsn > $2 " + "order by id " + " limit 1"; + + log_warn("ld_store_lookup_output_after_lsn: %X/%X", LSN_FORMAT_ARGS(lsn)); + + SQLiteQuery query = { + .errorOnZeroRows = false, + .context = output, + .fetchFunction = &ld_store_output_fetch + }; + + if (!catalog_sql_prepare(db, sql, &query)) { - strlcpy(filename, - (char *) sqlite3_column_text(query->ppStmt, 0), - MAXPGPATH); + /* errors have already been logged */ + return false; + } + + /* bind our parameters now */ + BindParam params[] = { + { BIND_PARAMETER_TYPE_INT64, "lsn", lsn }, + { BIND_PARAMETER_TYPE_INT64, "lsn", lsn } + }; + + int count = sizeof(params) / sizeof(params[0]); + + if (!catalog_sql_bind(&query, params, count)) + { + /* errors have already been logged */ + return false; + } + + /* now execute the query, which return exactly one row */ + if (!catalog_sql_execute_once(&query)) + { + /* errors have already been logged */ + return false; } return true; @@ -340,28 +447,37 @@ ld_store_cdc_filename_fetch(SQLiteQuery *query) /* - * ld_store_lookup_lsn searches the given LSN in the given replayDB database. + * ld_store_lookup_output_xid_end searches the last message for the given + * transaction (xid) in the replayDB database. */ bool -ld_store_lookup_lsn(DatabaseCatalog *catalog, uint64_t lsn) +ld_store_lookup_output_xid_end(DatabaseCatalog *catalog, + uint32_t xid, + ReplayDBOutputMessage *output) { sqlite3 *db = catalog->db; if (db == NULL) { - log_error("BUG: ld_store_lookup_lsn: db is NULL"); + log_error("BUG: ld_store_lookup_output_xid_end: db is NULL"); return false; } - char *sql = "select 1 from output where lsn = $1 limit 1"; + char *sql = + " select id, action, xid, lsn, timestamp, message " + " from output " + " where xid = $1 and (action = 'C' or action = 'R') " + "order by id " + " limit 1"; + + log_warn("ld_store_lookup_output_xid_end: %u", xid); SQLiteQuery query = { - .errorOnZeroRows = true + .errorOnZeroRows = true, + .context = output, + .fetchFunction = &ld_store_output_fetch }; - char pg_lsn[PG_LSN_MAXLENGTH] = { 0 }; - sformat(pg_lsn, sizeof(pg_lsn), "%X/%X", LSN_FORMAT_ARGS(lsn)); - if (!catalog_sql_prepare(db, sql, &query)) { /* errors have already been logged */ @@ -370,7 +486,7 @@ ld_store_lookup_lsn(DatabaseCatalog *catalog, uint64_t lsn) /* bind our parameters now */ BindParam params[1] = { - { BIND_PARAMETER_TYPE_TEXT, "lsn", 0, (char *) pg_lsn }, + { BIND_PARAMETER_TYPE_INT64, "xid", xid }, }; if (!catalog_sql_bind(&query, params, 1)) @@ -390,6 +506,82 @@ ld_store_lookup_lsn(DatabaseCatalog *catalog, uint64_t lsn) } +/* + * ld_store_output_fetch fetches a ReplayDBOutputMessage entry from a SQLite + * ppStmt result set. + */ +bool +ld_store_output_fetch(SQLiteQuery *query) +{ + ReplayDBOutputMessage *output = (ReplayDBOutputMessage *) query->context; + + /* cleanup the memory area before re-use */ + bzero(output, sizeof(ReplayDBOutputMessage)); + + output->id = sqlite3_column_int64(query->ppStmt, 0); + + if (sqlite3_column_type(query->ppStmt, 1) != SQLITE_NULL) + { + char *action = (char *) sqlite3_column_text(query->ppStmt, 1); + output->action = action[0]; + } + else + { + log_error("Failed to fetch action for output id %lld", + (long long) output->id); + return false; + } + + if (sqlite3_column_type(query->ppStmt, 2) != SQLITE_NULL) + { + output->xid = sqlite3_column_int64(query->ppStmt, 2); + } + + /* lsn could be NULL */ + output->lsn = InvalidXLogRecPtr; + + if (sqlite3_column_type(query->ppStmt, 3) != SQLITE_NULL) + { + output->lsn = sqlite3_column_int64(query->ppStmt, 3); + } + + log_warn("ld_store_output_fetch: %lld %c %u %X/%X", + (long long) output->id, + output->action, + output->xid, + LSN_FORMAT_ARGS(output->lsn)); + + /* timestamp */ + if (sqlite3_column_type(query->ppStmt, 4) != SQLITE_NULL) + { + strlcpy(output->timestamp, + (char *) sqlite3_column_text(query->ppStmt, 4), + sizeof(output->timestamp)); + } + + /* message */ + if (sqlite3_column_type(query->ppStmt, 5) != SQLITE_NULL) + { + int len = sqlite3_column_bytes(query->ppStmt, 5); + int bytes = len + 1; + + output->jsonBuffer = (char *) calloc(bytes, sizeof(char)); + + if (output->jsonBuffer == NULL) + { + log_fatal(ALLOCATION_FAILED_ERROR); + return false; + } + + strlcpy(output->jsonBuffer, + (char *) sqlite3_column_text(query->ppStmt, 5), + bytes); + } + + return true; +} + + /* * ld_store_insert_cdc_filename inserts a new entry to the streaming * table of replaydb filename with metadata. @@ -530,8 +722,14 @@ ld_store_insert_message(DatabaseCatalog *catalog, } char *sql = - "insert or replace into output(action, xid, lsn, timestamp, message)" - "values($1, $2, $3, $4, $5)"; + "insert into output(action, xid, lsn, timestamp, message)" + "values($1, $2, $3, $4, $5) "; + + if (!semaphore_lock(&(catalog->sema))) + { + /* errors have already been logged */ + return false; + } SQLiteQuery query = { 0 }; @@ -546,15 +744,12 @@ ld_store_insert_message(DatabaseCatalog *catalog, metadata->xid == 0 ? BIND_PARAMETER_TYPE_NULL : BIND_PARAMETER_TYPE_INT64; char action[2] = { metadata->action, '\0' }; - char lsn[PG_LSN_MAXLENGTH] = { 0 }; - - sformat(lsn, sizeof(lsn), "%X/%X", LSN_FORMAT_ARGS(metadata->lsn)); /* bind our parameters now */ BindParam params[] = { { BIND_PARAMETER_TYPE_TEXT, "action", 0, action }, { xidParamType, "xid", metadata->xid, NULL }, - { BIND_PARAMETER_TYPE_TEXT, "lsn", 0, lsn }, + { BIND_PARAMETER_TYPE_INT64, "lsn", metadata->lsn, NULL }, { BIND_PARAMETER_TYPE_TEXT, "timestamp", 0, metadata->timestamp }, { BIND_PARAMETER_TYPE_TEXT, "message", 0, metadata->jsonBuffer } }; @@ -564,6 +759,7 @@ ld_store_insert_message(DatabaseCatalog *catalog, if (!catalog_sql_bind(&query, params, count)) { /* errors have already been logged */ + (void) semaphore_unlock(&(catalog->sema)); return false; } @@ -571,9 +767,12 @@ ld_store_insert_message(DatabaseCatalog *catalog, if (!catalog_sql_execute_once(&query)) { /* errors have already been logged */ + (void) semaphore_unlock(&(catalog->sema)); return false; } + (void) semaphore_unlock(&(catalog->sema)); + return true; } @@ -598,6 +797,16 @@ ld_store_insert_internal_message(DatabaseCatalog *catalog, "insert or replace into output(action, lsn, timestamp)" "values($1, $2, $3)"; + log_warn("ld_store_insert_internal_message: %c %X/%X", + message->action, + LSN_FORMAT_ARGS(message->lsn)); + + if (!semaphore_lock(&(catalog->sema))) + { + /* errors have already been logged */ + return false; + } + SQLiteQuery query = { 0 }; if (!catalog_sql_prepare(db, sql, &query)) @@ -624,14 +833,11 @@ ld_store_insert_internal_message(DatabaseCatalog *catalog, } char action[2] = { message->action, '\0' }; - char lsn[PG_LSN_MAXLENGTH] = { 0 }; - - sformat(lsn, sizeof(lsn), "%X/%X", LSN_FORMAT_ARGS(message->lsn)); /* bind our parameters now */ BindParam params[] = { { BIND_PARAMETER_TYPE_TEXT, "action", 0, action }, - { BIND_PARAMETER_TYPE_TEXT, "lsn", 0, lsn }, + { BIND_PARAMETER_TYPE_INT64, "lsn", message->lsn, NULL }, { timeParamType, "timestamp", 0, message->timeStr } }; @@ -640,6 +846,7 @@ ld_store_insert_internal_message(DatabaseCatalog *catalog, if (!catalog_sql_bind(&query, params, count)) { /* errors have already been logged */ + (void) semaphore_unlock(&(catalog->sema)); return false; } @@ -647,9 +854,12 @@ ld_store_insert_internal_message(DatabaseCatalog *catalog, if (!catalog_sql_execute_once(&query)) { /* errors have already been logged */ + (void) semaphore_unlock(&(catalog->sema)); return false; } + (void) semaphore_unlock(&(catalog->sema)); + return true; } @@ -670,19 +880,26 @@ ld_store_insert_replay_stmt(DatabaseCatalog *catalog, return false; } + if (!semaphore_lock(&(catalog->sema))) + { + /* errors have already been logged */ + return false; + } + /* compute the hash as a string, needed in both stmt and replay tables */ char hash[9] = { 0 }; sformat(hash, sizeof(hash), "%x", replayStmt->hash); if (replayStmt->stmt != NULL) { - char *sql = "insert into stmt (hash, sql) values($1, $2)"; + char *sql = "insert or ignore into stmt(hash, sql) values($1, $2)"; SQLiteQuery query = { 0 }; if (!catalog_sql_prepare(db, sql, &query)) { /* errors have already been logged */ + (void) semaphore_unlock(&(catalog->sema)); return false; } @@ -697,6 +914,7 @@ ld_store_insert_replay_stmt(DatabaseCatalog *catalog, if (!catalog_sql_bind(&query, params, count)) { /* errors have already been logged */ + (void) semaphore_unlock(&(catalog->sema)); return false; } @@ -704,37 +922,44 @@ ld_store_insert_replay_stmt(DatabaseCatalog *catalog, if (!catalog_sql_execute_once(&query)) { /* errors have already been logged */ + (void) semaphore_unlock(&(catalog->sema)); return false; } } char *sql = - "insert into replay(action, xid, lsn, timestamp, stmt_hash, stmt_args)" - "values($1, $2, $3, $4, $5, $6)"; + "insert into replay" + "(action, xid, lsn, endlsn, timestamp, stmt_hash, stmt_args)" + "values($1, $2, $3, $4, $5, $6, $7)"; SQLiteQuery query = { 0 }; if (!catalog_sql_prepare(db, sql, &query)) { /* errors have already been logged */ + (void) semaphore_unlock(&(catalog->sema)); return false; } char action[2] = { replayStmt->action, '\0' }; /* not all messages have an xid */ - BindParameterType xidParamType = replayStmt->xid > 0 ? - BIND_PARAMETER_TYPE_INT64 : BIND_PARAMETER_TYPE_NULL; + BindParameterType xidParamType = + replayStmt->xid > 0 ? + BIND_PARAMETER_TYPE_INT64 : + BIND_PARAMETER_TYPE_NULL; /* not all messages have an lsn */ - char lsn[PG_LSN_MAXLENGTH] = { 0 }; - BindParameterType lsnParamType = BIND_PARAMETER_TYPE_NULL; + BindParameterType lsnParamType = + replayStmt->lsn == InvalidXLogRecPtr ? + BIND_PARAMETER_TYPE_NULL : + BIND_PARAMETER_TYPE_INT64; - if (replayStmt->lsn != InvalidXLogRecPtr) - { - lsnParamType = BIND_PARAMETER_TYPE_TEXT; - sformat(lsn, sizeof(lsn), "%X/%X", LSN_FORMAT_ARGS(replayStmt->lsn)); - } + /* not all messages have an end lsn */ + BindParameterType endlsnParamType = + replayStmt->endlsn == InvalidXLogRecPtr ? + BIND_PARAMETER_TYPE_NULL : + BIND_PARAMETER_TYPE_INT64; /* not all messages have a time entry */ char *timestr = NULL; @@ -747,17 +972,22 @@ ld_store_insert_replay_stmt(DatabaseCatalog *catalog, } /* not all messages have a statement (hash, data) */ - BindParameterType hashParamType = replayStmt->hash > 0 ? - BIND_PARAMETER_TYPE_TEXT : BIND_PARAMETER_TYPE_NULL; + BindParameterType hashParamType = + replayStmt->hash > 0 ? + BIND_PARAMETER_TYPE_TEXT : + BIND_PARAMETER_TYPE_NULL; - BindParameterType dataParamType = replayStmt->data != NULL ? - BIND_PARAMETER_TYPE_TEXT : BIND_PARAMETER_TYPE_NULL; + BindParameterType dataParamType = + replayStmt->data != NULL ? + BIND_PARAMETER_TYPE_TEXT : + BIND_PARAMETER_TYPE_NULL; /* bind our parameters now */ BindParam params[] = { { BIND_PARAMETER_TYPE_TEXT, "action", 0, action }, { xidParamType, "xid", replayStmt->xid, NULL }, - { lsnParamType, "lsn", 0, lsn }, + { lsnParamType, "lsn", replayStmt->lsn, NULL }, + { endlsnParamType, "endlsn", replayStmt->endlsn, NULL }, { timeParamType, "timestamp", 0, timestr }, { hashParamType, "stmt_hash", 0, hash }, { dataParamType, "stmt_args", 0, replayStmt->data }, @@ -768,6 +998,7 @@ ld_store_insert_replay_stmt(DatabaseCatalog *catalog, if (!catalog_sql_bind(&query, params, count)) { /* errors have already been logged */ + (void) semaphore_unlock(&(catalog->sema)); return false; } @@ -775,9 +1006,12 @@ ld_store_insert_replay_stmt(DatabaseCatalog *catalog, if (!catalog_sql_execute_once(&query)) { /* errors have already been logged */ + (void) semaphore_unlock(&(catalog->sema)); return false; } + (void) semaphore_unlock(&(catalog->sema)); + return true; } @@ -795,54 +1029,112 @@ ld_store_iter_output(StreamSpecs *specs, ReplayDBOutputIterFun *callback) iter->transform_lsn = specs->sentinel.transform_lsn; iter->endpos = specs->endpos; + DatabaseCatalog *catalog = iter->catalog; + + if (!semaphore_lock(&(catalog->sema))) + { + /* errors have already been logged */ + return false; + } + if (!ld_store_iter_output_init(iter)) { /* errors have already been logged */ + (void) semaphore_unlock(&(catalog->sema)); return false; } - for (;;) + if (iter->output == NULL || + iter->output->action == STREAM_ACTION_UNKNOWN) { - if (!ld_store_iter_output_next(iter)) + /* no rows returned from the init */ + log_warn("ld_store_iter_output: no rows"); + (void) semaphore_unlock(&(catalog->sema)); + return true; + } + + /* single message, call the callback function and finish */ + if (iter->output->action != STREAM_ACTION_BEGIN) + { + bool stop = false; + ReplayDBOutputMessage *output = iter->output; + + log_warn("ld_store_iter_output: %c %s", + output->action, + StreamActionToString(output->action)); + + /* now call the provided callback */ + if (!(*callback)(specs, output, &stop)) + { + log_error("Failed to iterate over CDC output messages, " + "see above for details"); + (void) semaphore_unlock(&(catalog->sema)); + return false; + } + + if (!ld_store_iter_output_finish(iter)) { /* errors have already been logged */ + (void) semaphore_unlock(&(catalog->sema)); return false; } - ReplayDBOutputMessage *output = iter->output; + (void) semaphore_unlock(&(catalog->sema)); - if (output == NULL) + return true; + } + else + { + /* iterate over a transaction */ + for (;;) { - if (!ld_store_iter_output_finish(iter)) + if (!ld_store_iter_output_next(iter)) { /* errors have already been logged */ + (void) semaphore_unlock(&(catalog->sema)); return false; } - break; - } + ReplayDBOutputMessage *output = iter->output; - bool stop = false; + if (output == NULL) + { + if (!ld_store_iter_output_finish(iter)) + { + /* errors have already been logged */ + (void) semaphore_unlock(&(catalog->sema)); + return false; + } + + break; + } - /* now call the provided callback */ - if (!(*callback)(specs, output, &stop)) - { - log_error("Failed to iterate over CDC output messages, " - "see above for details"); - return false; - } + bool stop = false; - if (stop) - { - if (!ld_store_iter_output_finish(iter)) + /* now call the provided callback */ + if (!(*callback)(specs, output, &stop)) { - /* errors have already been logged */ + log_error("Failed to iterate over CDC output messages, " + "see above for details"); + (void) semaphore_unlock(&(catalog->sema)); return false; } - return true; + + if (stop) + { + if (!ld_store_iter_output_finish(iter)) + { + /* errors have already been logged */ + (void) semaphore_unlock(&(catalog->sema)); + return false; + } + break; + } } } + (void) semaphore_unlock(&(catalog->sema)); + return true; } @@ -854,7 +1146,8 @@ ld_store_iter_output(StreamSpecs *specs, ReplayDBOutputIterFun *callback) bool ld_store_iter_output_init(ReplayDBOutputIterator *iter) { - sqlite3 *db = iter->catalog->db; + DatabaseCatalog *catalog = iter->catalog; + sqlite3 *db = catalog->db; if (db == NULL) { @@ -871,10 +1164,79 @@ ld_store_iter_output_init(ReplayDBOutputIterator *iter) return false; } + ReplayDBOutputMessage first = { 0 }; + ReplayDBOutputMessage last = { 0 }; + + /* + * Grab the output row for the given LSN, and then if it's a single message + * (action is SWITCH, ENDPOS, or KEEPALIVE) return it. If the message is a + * BEGIN message, lookup the associated COMMIT message's lsn (same xid) and + * then grab all the messages from that transaction. + */ + if (!ld_store_lookup_output_after_lsn(catalog, iter->transform_lsn, &first)) + { + /* errors have already been logged */ + iter->output = NULL; + return false; + } + + if (first.lsn == InvalidXLogRecPtr) + { + /* no rows available yet */ + iter->output = NULL; + return true; + } + + switch (first.action) + { + case STREAM_ACTION_SWITCH: + case STREAM_ACTION_KEEPALIVE: + case STREAM_ACTION_ENDPOS: + { + /* single message, just return it */ + log_warn("ld_store_iter_output_init: single message %c", first.action); + *(iter->output) = first; + return true; + } + + case STREAM_ACTION_BEGIN: + { + /* greab the COMMIT or ROLLBACK output entry if there is one */ + if (!ld_store_lookup_output_xid_end(catalog, first.xid, &last)) + { + /* errors have already been logged */ + iter->output = NULL; + return false; + } + + /* the COMMIT/ROLLBACK message is not available yet */ + if (last.lsn == InvalidXLogRecPtr) + { + iter->output = NULL; + return true; + } + + /* return the first message we iterate over */ + *(iter->output) = first; + + break; + } + + default: + { + log_error("Failed to start iterating over output at LSN %X/%X " + "with unexpected action %s", + LSN_FORMAT_ARGS(iter->transform_lsn), + StreamActionToString(first.action)); + iter->output = NULL; + return false; + } + } + char *sql = " select id, action, xid, lsn, timestamp, message " " from output " - " where lsn > $1 and lsn <= $2 " + " where xid = $1 " " order by id"; SQLiteQuery *query = &(iter->query); @@ -882,32 +1244,28 @@ ld_store_iter_output_init(ReplayDBOutputIterator *iter) query->context = iter->output; query->fetchFunction = &ld_store_output_fetch; + BindParam params[] = { + { BIND_PARAMETER_TYPE_INT64, "xid", first.xid, NULL } + }; + + int count = sizeof(params) / sizeof(params[0]); + if (!catalog_sql_prepare(db, sql, query)) { /* errors have already been logged */ return false; } - char tlsn[PG_LSN_MAXLENGTH] = { 0 }; - char elsn[PG_LSN_MAXLENGTH] = { 0 }; - - sformat(tlsn, sizeof(tlsn), "%X/%X", LSN_FORMAT_ARGS(iter->transform_lsn)); - sformat(elsn, sizeof(elsn), "%X/%X", LSN_FORMAT_ARGS(iter->endpos)); - - /* bind our parameters now */ - BindParam params[] = { - { BIND_PARAMETER_TYPE_TEXT, "transform_lsn", 0, tlsn }, - { BIND_PARAMETER_TYPE_TEXT, "endpos", 0, elsn } - }; - - int count = sizeof(params) / sizeof(params[0]); - + /* re-use params, hard code the count */ if (!catalog_sql_bind(query, params, count)) { /* errors have already been logged */ return false; } + log_warn("ld_store_iter_output_init: %s", sql); + log_warn("ld_store_iter_output_init: xid = %u", first.xid); + return true; } @@ -932,84 +1290,27 @@ ld_store_iter_output_next(ReplayDBOutputIterator *iter) if (rc != SQLITE_ROW) { log_error("Failed to step through statement: %s", query->sql); - log_error("[SQLite] %s", sqlite3_errmsg(query->db)); - return false; - } - - return ld_store_output_fetch(query); -} - - -/* - * ld_store_output_fetch fetches a ReplayDBOutputMessage entry from a SQLite - * ppStmt result set. - */ -bool -ld_store_output_fetch(SQLiteQuery *query) -{ - ReplayDBOutputMessage *output = (ReplayDBOutputMessage *) query->context; - - /* cleanup the memory area before re-use */ - bzero(output, sizeof(ReplayDBOutputMessage)); - - output->id = sqlite3_column_int64(query->ppStmt, 0); - - if (sqlite3_column_type(query->ppStmt, 1) != SQLITE_NULL) - { - char *action = (char *) sqlite3_column_text(query->ppStmt, 1); - output->action = action[0]; - } - else - { - log_error("Failed to fetch action for output id %lld", - (long long) output->id); - return false; - } - output->xid = sqlite3_column_int64(query->ppStmt, 2); - - /* lsn could be NULL */ - output->lsn = InvalidXLogRecPtr; + int offset = sqlite3_error_offset(query->db); - if (sqlite3_column_type(query->ppStmt, 3) != SQLITE_NULL) - { - char *lsn = (char *) sqlite3_column_text(query->ppStmt, 3); - - if (!parseLSN(lsn, &(output->lsn))) + if (offset != -1) { - log_error("Failed to parse LSN \"%s\"", lsn); - return false; + /* "Failed to step through statement: %s" is 34 chars of prefix */ + log_error("%34s%*s^", " ", offset, " "); } - } - /* timestamp */ - if (sqlite3_column_type(query->ppStmt, 4) != SQLITE_NULL) - { - strlcpy(output->timestamp, - (char *) sqlite3_column_text(query->ppStmt, 4), - sizeof(output->timestamp)); - } + int errcode = sqlite3_extended_errcode(query->db); - /* message */ - if (sqlite3_column_type(query->ppStmt, 5) != SQLITE_NULL) - { - int len = sqlite3_column_bytes(query->ppStmt, 5); - int bytes = len + 1; - - output->jsonBuffer = (char *) calloc(bytes, sizeof(char)); - - if (output->jsonBuffer == NULL) - { - log_fatal(ALLOCATION_FAILED_ERROR); - return false; - } + log_error("[SQLite] %s: %s", + sqlite3_errmsg(query->db), + sqlite3_errstr(errcode)); - strlcpy(output->jsonBuffer, - (char *) sqlite3_column_text(query->ppStmt, 5), - bytes); + return false; } - return true; + log_warn("ld_store_iter_output_next"); + + return ld_store_output_fetch(query); } @@ -1027,5 +1328,7 @@ ld_store_iter_output_finish(ReplayDBOutputIterator *iter) return false; } + iter->output = NULL; + return true; } diff --git a/src/bin/pgcopydb/ld_store.h b/src/bin/pgcopydb/ld_store.h index 9490b0976..b9b3696bc 100644 --- a/src/bin/pgcopydb/ld_store.h +++ b/src/bin/pgcopydb/ld_store.h @@ -38,6 +38,7 @@ typedef struct ReplayDBStmt StreamAction action; uint32_t xid; uint64_t lsn; + uint64_t endlsn; char timestamp[PG_MAX_TIMESTAMP]; @@ -50,12 +51,23 @@ typedef struct ReplayDBStmt bool ld_store_open_replaydb(StreamSpecs *specs); bool ld_store_set_current_cdc_filename(StreamSpecs *specs); -bool ld_store_set_first_cdc_filename(StreamSpecs *specs); bool ld_store_set_cdc_filename_at_lsn(StreamSpecs *specs, uint64_t lsn); bool ld_store_cdc_filename_fetch(SQLiteQuery *query); -bool ld_store_lookup_lsn(DatabaseCatalog *catalog, uint64_t lsn); +bool ld_store_lookup_output_at_lsn(DatabaseCatalog *catalog, + uint64_t lsn, + ReplayDBOutputMessage *output); + +bool ld_store_lookup_output_after_lsn(DatabaseCatalog *catalog, + uint64_t lsn, + ReplayDBOutputMessage *output); + +bool ld_store_lookup_output_xid_end(DatabaseCatalog *catalog, + uint32_t xid, + ReplayDBOutputMessage *output); + +bool ld_store_output_fetch(SQLiteQuery *query); bool ld_store_insert_cdc_filename(StreamSpecs *specs); @@ -91,7 +103,6 @@ typedef struct ReplayDBOutputIterator bool ld_store_iter_output_init(ReplayDBOutputIterator *iter); bool ld_store_iter_output_next(ReplayDBOutputIterator *iter); -bool ld_store_output_fetch(SQLiteQuery *query); bool ld_store_iter_output_finish(ReplayDBOutputIterator *iter); #endif /* LD_STORE_H */ diff --git a/src/bin/pgcopydb/ld_stream.c b/src/bin/pgcopydb/ld_stream.c index 069de160f..dc4e1f8d0 100644 --- a/src/bin/pgcopydb/ld_stream.c +++ b/src/bin/pgcopydb/ld_stream.c @@ -547,12 +547,13 @@ startLogicalStreaming(StreamSpecs *specs) /* ignore errors, try again unless asked to stop */ bool cleanExit = pgsql_stream_logical(&stream, &context); - if (cleanExit || asked_to_stop || asked_to_stop_fast || asked_to_quit) + if ((cleanExit && context.endpos != InvalidXLogRecPtr) || + asked_to_stop || asked_to_stop_fast || asked_to_quit) { retry = false; } - if (cleanExit) + if (cleanExit && context.endpos != InvalidXLogRecPtr) { log_info("Streamed up to write_lsn %X/%X, flush_lsn %X/%X, stopping: " "endpos is %X/%X", @@ -560,6 +561,13 @@ startLogicalStreaming(StreamSpecs *specs) LSN_FORMAT_ARGS(context.tracking->flushed_lsn), LSN_FORMAT_ARGS(context.endpos)); } + else if (cleanExit && context.endpos == InvalidXLogRecPtr) + { + log_info("Streamed up to write_lsn %X/%X, flush_lsn %X/%X, " + "reconnecting in 1s ", + LSN_FORMAT_ARGS(context.tracking->written_lsn), + LSN_FORMAT_ARGS(context.tracking->flushed_lsn)); + } else if (retries > 0 && context.tracking->written_lsn == waterMarkLSN) { @@ -1049,6 +1057,9 @@ streamCloseFile(LogicalStreamContext *context, bool time_to_abort) .lsn = context->cur_record_lsn }; + log_warn("streamCloseFile: insert ENDPOS at %X/%X", + LSN_FORMAT_ARGS(endpos.lsn)); + if (!ld_store_insert_internal_message(replayDB, &endpos)) { /* errors have already been logged */ @@ -1934,6 +1945,80 @@ StreamActionToString(StreamAction action) } +/* + * StreamActionIsTCL returns true if the action is part of the Transaction + * Control Language. + */ +bool +StreamActionIsTCL(StreamAction action) +{ + switch (action) + { + case STREAM_ACTION_BEGIN: + case STREAM_ACTION_COMMIT: + case STREAM_ACTION_ROLLBACK: + { + return true; + } + + default: + return false; + } + + /* keep compiler happy */ + return false; +} + + +/* + * StreamActionIsDML returns true if the action is a DML. + */ +bool +StreamActionIsDML(StreamAction action) +{ + switch (action) + { + case STREAM_ACTION_INSERT: + case STREAM_ACTION_UPDATE: + case STREAM_ACTION_DELETE: + case STREAM_ACTION_TRUNCATE: + { + return true; + } + + default: + return false; + } + + /* keep compiler happy */ + return false; +} + + +/* + * StreamActionIsInternal returns true if the action is internal. + */ +bool +StreamActionIsInternal(StreamAction action) +{ + switch (action) + { + case STREAM_ACTION_ENDPOS: + case STREAM_ACTION_KEEPALIVE: + case STREAM_ACTION_SWITCH: + { + return true; + } + + default: + return false; + } + + /* keep compiler happy */ + return false; +} + + /* * stream_setup_source_database sets up the source database with a sentinel * table, and the target database with a replication origin. diff --git a/src/bin/pgcopydb/ld_stream.h b/src/bin/pgcopydb/ld_stream.h index 4e6584d9e..ec8599336 100644 --- a/src/bin/pgcopydb/ld_stream.h +++ b/src/bin/pgcopydb/ld_stream.h @@ -191,6 +191,9 @@ typedef struct LogicalMessageEndpos typedef struct LogicalTransactionStatement { StreamAction action; + uint32_t xid; + uint64_t lsn; + char timestamp[PG_MAX_TIMESTAMP]; union stmt { @@ -244,7 +247,11 @@ typedef struct LogicalTransactionArray typedef struct LogicalMessage { bool isTransaction; + StreamAction action; + uint32_t xid; + uint64_t lsn; + char timestamp[PG_MAX_TIMESTAMP]; union command { @@ -347,6 +354,8 @@ typedef struct StreamContext LogicalMessageMetadata previous; LogicalTransactionStatement *stmt; + uint64_t transform_lsn; /* current transform_lsn in-memory */ + uint64_t maxWrittenLSN; /* max LSN written so far to the JSON files */ uint64_t lastWriteTime; @@ -602,6 +611,9 @@ bool stream_fetch_current_lsn(uint64_t *lsn, StreamAction StreamActionFromChar(char action); char * StreamActionToString(StreamAction action); +bool StreamActionIsTCL(StreamAction action); +bool StreamActionIsDML(StreamAction action); +bool StreamActionIsInternal(StreamAction action); /* ld_transform.c */ diff --git a/src/bin/pgcopydb/ld_transform.c b/src/bin/pgcopydb/ld_transform.c index 99f0dbc51..086650c97 100644 --- a/src/bin/pgcopydb/ld_transform.c +++ b/src/bin/pgcopydb/ld_transform.c @@ -74,6 +74,8 @@ static bool stream_transform_cdc_file_hook(StreamSpecs *specs, ReplayDBOutputMessage *output, bool *stop); +static bool stream_transform_prepare_message(StreamSpecs *specs, + ReplayDBOutputMessage *output); static bool stream_write_insert(ReplayDBStmt *replayStmt, LogicalMessageInsert *insert); @@ -95,23 +97,34 @@ static bool stream_write_truncate(ReplayDBStmt *replayStmt, bool stream_transform_messages(StreamSpecs *specs) { + CopyDBSentinel *sentinel = &(specs->sentinel); StreamContext *privateContext = &(specs->private); - if (!stream_transform_context_init(specs)) + /* First, grab init values from the sentinel */ + if (!stream_transform_resume(specs)) { - /* errors have already been logged */ + log_error("Failed to resume transform from %X/%X, startpos %X/%X", + LSN_FORMAT_ARGS(privateContext->transform_lsn), + LSN_FORMAT_ARGS(privateContext->startpos)); return false; } - if (!stream_transform_resume(specs)) + /* + * Now prepare our context, including a pgsql connection that's needed for + * libpq's implementation of escaping identifiers and such. + */ + if (!stream_transform_context_init(specs)) { - log_error("Failed to resume streaming from %X/%X", - LSN_FORMAT_ARGS(privateContext->startpos)); + /* errors have already been logged */ return false; } + /* + * And loop over iterating our replayDB files one transaction at a time, + * switching over to the next file when necessary. + */ while (privateContext->endpos == InvalidXLogRecPtr || - specs->sentinel.transform_lsn <= privateContext->endpos) + sentinel->transform_lsn < privateContext->endpos) { if (asked_to_stop || asked_to_stop_fast || asked_to_quit) { @@ -119,12 +132,11 @@ stream_transform_messages(StreamSpecs *specs) return true; } - if (!ld_store_set_cdc_filename_at_lsn(specs, - specs->sentinel.transform_lsn)) + if (!ld_store_set_cdc_filename_at_lsn(specs, sentinel->transform_lsn)) { log_error("Failed to find CDC file at lsn %X/%X, " "see above for details", - LSN_FORMAT_ARGS(specs->sentinel.transform_lsn)); + LSN_FORMAT_ARGS(sentinel->transform_lsn)); return false; } @@ -137,12 +149,16 @@ stream_transform_messages(StreamSpecs *specs) } /* allow some time for the files and content to be created */ - pg_usleep(150 * 100); + pg_usleep(1500 * 1000); /* 1.5s */ } + /* + * This time use the sentinel transform_lsn, as a process restart will use + * that value, not the internal in-memory one. + */ log_info("Transform reached end position %X/%X at %X/%X", LSN_FORMAT_ARGS(privateContext->endpos), - LSN_FORMAT_ARGS(specs->sentinel.transform_lsn)); + LSN_FORMAT_ARGS(sentinel->transform_lsn)); return true; } @@ -195,15 +211,52 @@ stream_transform_context_init(StreamSpecs *specs) bool stream_transform_cdc_file(StreamSpecs *specs) { - log_notice("Transforming Logical Decoding messages from file \"%s\"", - specs->replayDB->dbfile); + CopyDBSentinel *sentinel = &(specs->sentinel); + StreamContext *privateContext = &(specs->private); + LogicalMessageMetadata *metadata = &(privateContext->metadata); - if (!ld_store_iter_output(specs, &stream_transform_cdc_file_hook)) + log_warn("Transforming Logical Decoding messages from file \"%s\" [%X/%X]", + specs->replayDB->dbfile, + LSN_FORMAT_ARGS(specs->sentinel.transform_lsn)); + + while (metadata->action != STREAM_ACTION_SWITCH) { - log_error("Failed to iterate over CDC file \"%s\", " - "see above for details", - specs->replayDB->dbfile); - return false; + if (asked_to_stop || asked_to_stop_fast || asked_to_quit) + { + log_debug("stream_transform_messages was asked to stop"); + return true; + } + + if (!ld_store_iter_output(specs, &stream_transform_cdc_file_hook)) + { + log_error("Failed to iterate over CDC file \"%s\", " + "see above for details", + specs->replayDB->dbfile); + return false; + } + + /* endpos might have been set now */ + if (!sentinel_get(specs->sourceDB, sentinel)) + { + /* errors have already been logged */ + return false; + } + + log_warn("stream_transform_cdc_file: endpos %X/%X", + LSN_FORMAT_ARGS(sentinel->endpos)); + + if (sentinel->endpos != InvalidXLogRecPtr && + sentinel->endpos <= sentinel->transform_lsn) + { + log_notice("Transform reached end position %X/%X at %X/%X", + LSN_FORMAT_ARGS(privateContext->endpos), + LSN_FORMAT_ARGS(sentinel->transform_lsn)); + + return true; + } + + /* allow some time for the files and content to be created */ + pg_usleep(1500 * 1000); /* 1.5s */ } return true; @@ -220,10 +273,9 @@ stream_transform_cdc_file_hook(StreamSpecs *specs, { CopyDBSentinel *sentinel = &(specs->sentinel); StreamContext *privateContext = &(specs->private); - LogicalMessageMetadata *metadata = &(privateContext->metadata); /* parse the logical decoding output */ - if (!stream_transform_message(privateContext, output->jsonBuffer)) + if (!stream_transform_prepare_message(specs, output)) { /* errors have already been logged */ return false; @@ -236,56 +288,125 @@ stream_transform_cdc_file_hook(StreamSpecs *specs, return false; } + DatabaseCatalog *sourceDB = specs->sourceDB; + + /* make internal note of the progress */ + uint64_t transform_lsn = privateContext->transform_lsn = output->lsn; + /* * At COMMIT, ROLLBACK, and KEEPALIVE, sync the sentinel transform_lsn. * At SWITCH, also sync transform_lsn so that we move on to the next file. */ - if (metadata->action == STREAM_ACTION_COMMIT || - metadata->action == STREAM_ACTION_ROLLBACK || - metadata->action == STREAM_ACTION_SWITCH || - metadata->action == STREAM_ACTION_KEEPALIVE) + switch (output->action) { - if (!sentinel_sync_transform(specs->sourceDB, metadata->lsn, sentinel)) + case STREAM_ACTION_COMMIT: + case STREAM_ACTION_ROLLBACK: + case STREAM_ACTION_SWITCH: + case STREAM_ACTION_KEEPALIVE: { - /* errors have already been logged */ - return false; - } + if (!sentinel_sync_transform(sourceDB, transform_lsn, sentinel)) + { + /* errors have already been logged */ + return false; + } - /* SWITCH is expected to be the last entry in the file */ - if (metadata->action == STREAM_ACTION_SWITCH) - { - *stop = true; - } - } - /* at ENDPOS check that it's the current sentinel value and exit */ - else if (metadata->action == STREAM_ACTION_ENDPOS) - { - if (!sentinel_sync_transform(specs->sourceDB, metadata->lsn, sentinel)) - { - /* errors have already been logged */ - return false; + /* SWITCH is expected to be the last entry in the file */ + if (output->action == STREAM_ACTION_SWITCH) + { + *stop = true; + } + + break; } - if (sentinel->endpos != InvalidXLogRecPtr && - sentinel->endpos <= metadata->lsn) + /* at ENDPOS check that it's the current sentinel value and exit */ + case STREAM_ACTION_ENDPOS: { - *stop = true; + if (!sentinel_sync_transform(sourceDB, transform_lsn, sentinel)) + { + /* errors have already been logged */ + return false; + } - log_info("Transform process reached ENDPOS %X/%X", - LSN_FORMAT_ARGS(metadata->lsn)); + if (sentinel->endpos != InvalidXLogRecPtr && + sentinel->endpos <= transform_lsn) + { + *stop = true; - return true; + log_info("Transform process reached ENDPOS %X/%X", + LSN_FORMAT_ARGS(output->lsn)); + + return true; + } + + break; + } + + /* nothing to do here for other actions */ + default: + { + /* noop */ + break; } } - if (privateContext->endpos != InvalidXLogRecPtr && - privateContext->endpos <= metadata->lsn) + /* we could reach the endpos on any message, not just ENDPOS */ + if (sentinel->endpos != InvalidXLogRecPtr && + sentinel->endpos <= transform_lsn) { *stop = true; log_info("Transform reached end position %X/%X at %X/%X", - LSN_FORMAT_ARGS(privateContext->endpos), - LSN_FORMAT_ARGS(metadata->lsn)); + LSN_FORMAT_ARGS(sentinel->endpos), + LSN_FORMAT_ARGS(transform_lsn)); + } + + return true; +} + + +/* + * stream_transform_prepare_message prepares a message with metadata taken from + * the replayDB output table, and parses the actual Logical Decoding message + * parts. + */ +bool +stream_transform_prepare_message(StreamSpecs *specs, + ReplayDBOutputMessage *output) +{ + StreamContext *privateContext = &(specs->private); + LogicalMessageMetadata *metadata = &(privateContext->metadata); + + log_warn("stream_transform_prepare_message"); + + /* first re-build the metadata from the SQLite row */ + LogicalMessageMetadata outputMetadata = { + .action = output->action, + .xid = output->xid, + .lsn = output->lsn, + .jsonBuffer = output->jsonBuffer + }; + + *metadata = outputMetadata; + + /* copy the timestamp over */ + strlcpy(metadata->timestamp, output->timestamp, PG_MAX_TIMESTAMP); + + log_warn("stream_transform_prepare_message: %lld %c %lld %X/%X %s", + (long long) output->id, + metadata->action, + (long long) metadata->xid, + LSN_FORMAT_ARGS(metadata->lsn), + metadata->jsonBuffer); + + JSON_Value *json = json_parse_string(output->jsonBuffer); + + if (!parseMessage(privateContext, output->jsonBuffer, json)) + { + log_error("Failed to parse JSON message: %.1024s%s", + output->jsonBuffer, + strlen(output->jsonBuffer) > 1024 ? "..." : ""); + return false; } return true; @@ -300,7 +421,6 @@ bool stream_transform_write_transaction(StreamSpecs *specs) { StreamContext *privateContext = &(specs->private); - LogicalMessage *currentMsg = &(privateContext->currentMsg); LogicalMessageMetadata *metadata = &(privateContext->metadata); @@ -323,6 +443,8 @@ stream_transform_write_transaction(StreamSpecs *specs) LogicalMessage empty = { 0 }; *currentMsg = empty; + log_warn("stream_transform_write_transaction: currentMsg is empty"); + return true; } @@ -457,9 +579,50 @@ stream_transform_resume(StreamSpecs *specs) */ CopyDBSentinel *sentinel = &(specs->sentinel); - if (!sentinel_get(specs->sourceDB, sentinel)) + ConnectionRetryPolicy retryPolicy = { 0 }; + + int maxT = 300; /* 5m */ + int maxSleepTime = 1500; /* 1.5s */ + int baseSleepTime = 150; /* 150ms */ + + (void) pgsql_set_retry_policy(&retryPolicy, + maxT, + -1, /* unbounded number of attempts */ + maxSleepTime, + baseSleepTime); + + while (!pgsql_retry_policy_expired(&retryPolicy)) { - /* errors have already been logged */ + if (asked_to_stop || asked_to_stop_fast || asked_to_quit) + { + log_debug("stream_transform_messages was asked to stop"); + log_fatal("stream_transform_messages was asked to stop"); + return true; + } + + if (!sentinel_get(specs->sourceDB, sentinel)) + { + /* errors have already been logged */ + return false; + } + + if (sentinel->transform_lsn != InvalidXLogRecPtr) + { + break; + } + + int sleepTimeMs = + pgsql_compute_connection_retry_sleep_time(&retryPolicy); + + /* we have milliseconds, pg_usleep() wants microseconds */ + (void) pg_usleep(sleepTimeMs * 1000); + } + + if (sentinel->transform_lsn == InvalidXLogRecPtr) + { + log_error("Transform failed to grab sentinel values " + "(transform_lsn is %X/%X)", + LSN_FORMAT_ARGS(sentinel->transform_lsn)); return false; } @@ -1286,13 +1449,13 @@ parseMessage(StreamContext *privateContext, char *message, JSON_Value *json) return false; } - if (message == NULL) + if (message == NULL && StreamActionIsDML(metadata->action)) { log_error("BUG: parseMessage called with a NULL message"); return false; } - if (json == NULL) + if (json == NULL && StreamActionIsDML(metadata->action)) { log_error("BUG: parseMessage called with a NULL JSON_Value"); return false; @@ -1306,17 +1469,16 @@ parseMessage(StreamContext *privateContext, char *message, JSON_Value *json) } /* - * Check that XID make sense, except for SWITCH messages, which don't have - * XID information, only have LSN information. + * Check that XID make sense for DML actions (Insert, Update, Delete, + * Truncate). */ - if (metadata->action == STREAM_ACTION_INSERT || - metadata->action == STREAM_ACTION_UPDATE || - metadata->action == STREAM_ACTION_DELETE || - metadata->action == STREAM_ACTION_TRUNCATE) + if (StreamActionIsDML(metadata->action)) { if (mesg->isTransaction) { - if (txn->xid > 0 && metadata->xid > 0 && txn->xid != metadata->xid) + if (txn->xid > 0 && + metadata->xid > 0 && + txn->xid != metadata->xid) { log_debug("%s", message); log_error("BUG: logical message xid is %lld, which is different " @@ -1337,15 +1499,13 @@ parseMessage(StreamContext *privateContext, char *message, JSON_Value *json) } /* - * All messages except for BEGIN/COMMIT/ROLLBACL need a - * LogicalTransactionStatement to represent them within the - * current transaction. + * All messages except for BEGIN/COMMIT/ROLLBACK (Transaction Control + * Language, or TCL) need a LogicalTransactionStatement to represent them + * within the current transaction. */ LogicalTransactionStatement *stmt = NULL; - if (metadata->action != STREAM_ACTION_BEGIN && - metadata->action != STREAM_ACTION_COMMIT && - metadata->action != STREAM_ACTION_ROLLBACK) + if (!StreamActionIsTCL(metadata->action)) { stmt = (LogicalTransactionStatement *) calloc(1, sizeof(LogicalTransactionStatement)); @@ -1357,6 +1517,10 @@ parseMessage(StreamContext *privateContext, char *message, JSON_Value *json) } stmt->action = metadata->action; + stmt->xid = metadata->xid; + stmt->lsn = metadata->lsn; + + strlcpy(stmt->timestamp, metadata->timestamp, sizeof(stmt->timestamp)); /* publish the statement in the privateContext */ privateContext->stmt = stmt; @@ -1439,17 +1603,16 @@ parseMessage(StreamContext *privateContext, char *message, JSON_Value *json) /* switch wal messages are pgcopydb internal messages */ case STREAM_ACTION_SWITCH: { - stmt->stmt.switchwal.lsn = metadata->lsn; - if (mesg->isTransaction) { (void) streamLogicalTransactionAppendStatement(txn, stmt); } else { - /* copy the stmt over, then free the extra allocated memory */ + /* maintain the LogicalMessage copy of the metadata */ mesg->action = metadata->action; - mesg->command.switchwal = stmt->stmt.switchwal; + mesg->lsn = metadata->lsn; + strlcpy(mesg->timestamp, metadata->timestamp, PG_MAX_TIMESTAMP); } break; @@ -1458,21 +1621,16 @@ parseMessage(StreamContext *privateContext, char *message, JSON_Value *json) /* keepalive messages are pgcopydb internal messages */ case STREAM_ACTION_KEEPALIVE: { - stmt->stmt.keepalive.lsn = metadata->lsn; - - strlcpy(stmt->stmt.keepalive.timestamp, - metadata->timestamp, - sizeof(stmt->stmt.keepalive.timestamp)); - if (mesg->isTransaction) { (void) streamLogicalTransactionAppendStatement(txn, stmt); } else { - /* copy the stmt over, then free the extra allocated memory */ + /* maintain the LogicalMessage copy of the metadata */ mesg->action = metadata->action; - mesg->command.keepalive = stmt->stmt.keepalive; + mesg->lsn = metadata->lsn; + strlcpy(mesg->timestamp, metadata->timestamp, PG_MAX_TIMESTAMP); } break; @@ -1480,17 +1638,16 @@ parseMessage(StreamContext *privateContext, char *message, JSON_Value *json) case STREAM_ACTION_ENDPOS: { - stmt->stmt.endpos.lsn = metadata->lsn; - if (mesg->isTransaction) { (void) streamLogicalTransactionAppendStatement(txn, stmt); } else { - /* copy the stmt over, then free the extra allocated memory */ + /* maintain the LogicalMessage copy of the metadata */ mesg->action = metadata->action; - mesg->command.endpos = stmt->stmt.endpos; + mesg->lsn = metadata->lsn; + strlcpy(mesg->timestamp, metadata->timestamp, PG_MAX_TIMESTAMP); } break; @@ -1512,11 +1669,7 @@ parseMessage(StreamContext *privateContext, char *message, JSON_Value *json) * use the raw JSON message as a json object in the "message" * object key. */ - JSON_Value_Type jsmesgtype = - json_value_get_type( - json_object_get_value( - json_value_get_object(json), - "message")); + JSON_Value_Type jsmesgtype = json_value_get_type(json); switch (jsmesgtype) { @@ -1856,38 +2009,22 @@ stream_transform_write_replay_stmt(StreamSpecs *specs) } else { - ReplayDBStmt replayStmt = { .action = msg->action }; + ReplayDBStmt replayStmt = { + .action = msg->action, + .xid = msg->xid, + .lsn = msg->lsn + }; - switch (msg->action) - { - case STREAM_ACTION_SWITCH: - { - replayStmt.lsn = msg->command.switchwal.lsn; - break; - } - - case STREAM_ACTION_KEEPALIVE: - { - replayStmt.lsn = msg->command.keepalive.lsn; - - strlcpy(replayStmt.timestamp, - msg->command.keepalive.timestamp, - sizeof(replayStmt.timestamp)); - break; - } - - case STREAM_ACTION_ENDPOS: - { - replayStmt.lsn = msg->command.endpos.lsn; - break; - } + strlcpy(replayStmt.timestamp, msg->timestamp, PG_MAX_TIMESTAMP); - default: - { - log_error("BUG: Failed to write SQL for LogicalMessage action %d", - msg->action); - return false; - } + if (replayStmt.action != STREAM_ACTION_SWITCH && + replayStmt.action != STREAM_ACTION_KEEPALIVE && + replayStmt.action != STREAM_ACTION_ENDPOS) + { + log_error("BUG: Failed to write SQL for unexpected " + "LogicalMessage action %d", + replayStmt.action); + return false; } if (!ld_store_insert_replay_stmt(specs->replayDB, &replayStmt)) @@ -1897,7 +2034,7 @@ stream_transform_write_replay_stmt(StreamSpecs *specs) } } - return false; + return true; } @@ -1915,9 +2052,14 @@ stream_transform_write_replay_txn(StreamSpecs *specs) ReplayDBStmt begin = { .action = STREAM_ACTION_BEGIN, .xid = txn->xid, - .lsn = txn->beginLSN + .lsn = txn->beginLSN, + .endlsn = txn->commit ? txn->commitLSN : txn->rollbackLSN }; + log_warn("stream_transform_write_replay_txn: lsn %X/%X endlsn %X/%X", + LSN_FORMAT_ARGS(begin.lsn), + LSN_FORMAT_ARGS(begin.endlsn)); + strlcpy(begin.timestamp, txn->timestamp, sizeof(begin.timestamp)); if (!ld_store_insert_replay_stmt(specs->replayDB, &begin)) @@ -1930,7 +2072,13 @@ stream_transform_write_replay_txn(StreamSpecs *specs) for (; currentStmt != NULL; currentStmt = currentStmt->next) { - ReplayDBStmt stmt = { .action = currentStmt->action }; + ReplayDBStmt stmt = { + .action = currentStmt->action, + .xid = currentStmt->xid, + .lsn = currentStmt->lsn + }; + + strlcpy(stmt.timestamp, currentStmt->timestamp, PG_MAX_TIMESTAMP); switch (currentStmt->action) { @@ -1938,6 +2086,7 @@ stream_transform_write_replay_txn(StreamSpecs *specs) { if (!stream_write_insert(&stmt, &(currentStmt->stmt.insert))) { + /* errors have already been logged */ return false; } @@ -1948,6 +2097,7 @@ stream_transform_write_replay_txn(StreamSpecs *specs) { if (!stream_write_update(&stmt, &(currentStmt->stmt.update))) { + /* errors have already been logged */ return false; } break; @@ -1957,6 +2107,7 @@ stream_transform_write_replay_txn(StreamSpecs *specs) { if (!stream_write_delete(&stmt, &(currentStmt->stmt.delete))) { + /* errors have already been logged */ return false; } break; @@ -1966,6 +2117,7 @@ stream_transform_write_replay_txn(StreamSpecs *specs) { if (!stream_write_truncate(&stmt, &(currentStmt->stmt.truncate))) { + /* errors have already been logged */ return false; } break; @@ -1973,7 +2125,7 @@ stream_transform_write_replay_txn(StreamSpecs *specs) default: { - log_error("BUG: Failed to write SQL action %d", + log_error("BUG: Failed to write unexepected SQL action %d", currentStmt->action); return false; } diff --git a/src/bin/pgcopydb/ld_wal2json.c b/src/bin/pgcopydb/ld_wal2json.c index dac7eb886..f3176b607 100644 --- a/src/bin/pgcopydb/ld_wal2json.c +++ b/src/bin/pgcopydb/ld_wal2json.c @@ -155,8 +155,7 @@ parseWal2jsonMessage(StreamContext *privateContext, case STREAM_ACTION_INSERT: { - JSON_Array *jscols = - json_object_dotget_array(jsobj, "message.columns"); + JSON_Array *jscols = json_object_get_array(jsobj, "columns"); stmt->stmt.insert.table = table; @@ -204,8 +203,7 @@ parseWal2jsonMessage(StreamContext *privateContext, } LogicalMessageTuple *old = &(stmt->stmt.update.old.array[0]); - JSON_Array *jsids = - json_object_dotget_array(jsobj, "message.identity"); + JSON_Array *jsids = json_object_get_array(jsobj, "identity"); if (!SetColumnNamesAndValues(old, message, jsids, pgsql)) { @@ -216,8 +214,7 @@ parseWal2jsonMessage(StreamContext *privateContext, } LogicalMessageTuple *new = &(stmt->stmt.update.new.array[0]); - JSON_Array *jscols = - json_object_dotget_array(jsobj, "message.columns"); + JSON_Array *jscols = json_object_get_array(jsobj, "columns"); if (!SetColumnNamesAndValues(new, message, jscols, pgsql)) { @@ -245,8 +242,7 @@ parseWal2jsonMessage(StreamContext *privateContext, } LogicalMessageTuple *old = &(stmt->stmt.update.old.array[0]); - JSON_Array *jsids = - json_object_dotget_array(jsobj, "message.identity"); + JSON_Array *jsids = json_object_get_array(jsobj, "identity"); if (!SetColumnNamesAndValues(old, message, jsids, pgsql)) { @@ -283,8 +279,8 @@ SetMessageRelation(JSON_Object *jsobj, char *schema = NULL; char *relname = NULL; - schema = (char *) json_object_dotget_string(jsobj, "message.schema"); - relname = (char *) json_object_dotget_string(jsobj, "message.table"); + schema = (char *) json_object_dotget_string(jsobj, "schema"); + relname = (char *) json_object_dotget_string(jsobj, "table"); if (schema == NULL || relname == NULL) diff --git a/src/bin/pgcopydb/sentinel.c b/src/bin/pgcopydb/sentinel.c index 6dd92be39..9a0c7c9ac 100644 --- a/src/bin/pgcopydb/sentinel.c +++ b/src/bin/pgcopydb/sentinel.c @@ -34,7 +34,7 @@ sentinel_setup(DatabaseCatalog *catalog, uint64_t startpos, uint64_t endpos) "insert or replace into sentinel(" " id, startpos, endpos, apply, " " write_lsn, transform_lsn, flush_lsn, replay_lsn) " - "values($1, $2, $3, $4, '0/0', '0/0', '0/0', '0/0')"; + "values($1, $2, $3, $4, '0/0', $5, '0/0', '0/0')"; if (!semaphore_lock(&(catalog->sema))) { @@ -62,7 +62,8 @@ sentinel_setup(DatabaseCatalog *catalog, uint64_t startpos, uint64_t endpos) { BIND_PARAMETER_TYPE_INT64, "id", 1, NULL }, { BIND_PARAMETER_TYPE_TEXT, "startpos", 0, (char *) startLSN }, { BIND_PARAMETER_TYPE_TEXT, "endpos", 0, (char *) endLSN }, - { BIND_PARAMETER_TYPE_INT, "apply", 0, NULL } + { BIND_PARAMETER_TYPE_INT, "apply", 0, NULL }, + { BIND_PARAMETER_TYPE_TEXT, "transform_lsn", 0, (char *) startLSN } }; int count = sizeof(params) / sizeof(params[0]); diff --git a/tests/follow-wal2json/copydb.sh b/tests/follow-wal2json/copydb.sh index 5b8d11748..3ac444da0 100755 --- a/tests/follow-wal2json/copydb.sh +++ b/tests/follow-wal2json/copydb.sh @@ -1,7 +1,7 @@ #! /bin/bash set -x -set -e +#set -e # This script expects the following environment variables to be set: # @@ -20,7 +20,17 @@ psql -o /tmp/d.out -d ${PGCOPYDB_SOURCE_PGURI} -1 -f /usr/src/pagila/pagila-data psql -d ${PGCOPYDB_SOURCE_PGURI} -f /usr/src/pgcopydb/ddl.sql # pgcopydb clone uses the environment variables -pgcopydb clone --follow --plugin wal2json +pgcopydb clone --follow --plugin wal2json --debug + +db="/var/lib/postgres/.local/share/pgcopydb/00000001-*.db" + +sqlite3 ${db} < Date: Thu, 18 Jul 2024 11:28:27 +0200 Subject: [PATCH 6/8] Some cleanup. --- src/bin/pgcopydb/catalog.c | 28 ++++++++++++++++++-- src/bin/pgcopydb/dump_restore.c | 46 ++++++++++++++++----------------- src/bin/pgcopydb/ld_stream.c | 44 +++++++++++++++++-------------- tests/follow-wal2json/copydb.sh | 2 +- 4 files changed, 75 insertions(+), 45 deletions(-) diff --git a/src/bin/pgcopydb/catalog.c b/src/bin/pgcopydb/catalog.c index 543405773..ec27a5920 100644 --- a/src/bin/pgcopydb/catalog.c +++ b/src/bin/pgcopydb/catalog.c @@ -8148,7 +8148,19 @@ catalog_sql_execute(SQLiteQuery *query) if (rc != SQLITE_DONE) { log_error("Failed to execute statement: %s", query->sql); - log_error("[SQLite %d] %s", rc, sqlite3_errstr(rc)); + + int offset = sqlite3_error_offset(query->db); + + if (offset != -1) + { + /* "Failed to step through statement: %s" is 34 chars of prefix */ + log_error("%34s%*s^", " ", offset, " "); + } + + log_error("[SQLite %d: %s]: %s", + rc, + sqlite3_errstr(rc), + sqlite3_errmsg(query->db)); (void) sqlite3_clear_bindings(query->ppStmt); (void) sqlite3_finalize(query->ppStmt); @@ -8207,7 +8219,19 @@ catalog_sql_execute(SQLiteQuery *query) if (catalog_sql_step(query) != SQLITE_DONE) { log_error("Failed to execute statement: %s", query->sql); - log_error("[SQLite %d] %s", rc, sqlite3_errstr(rc)); + + int offset = sqlite3_error_offset(query->db); + + if (offset != -1) + { + /* "Failed to step through statement: %s" is 34 chars of prefix */ + log_error("%34s%*s^", " ", offset, " "); + } + + log_error("[SQLite %d: %s]: %s", + rc, + sqlite3_errstr(rc), + sqlite3_errmsg(query->db)); (void) sqlite3_clear_bindings(query->ppStmt); (void) sqlite3_finalize(query->ppStmt); diff --git a/src/bin/pgcopydb/dump_restore.c b/src/bin/pgcopydb/dump_restore.c index 4a993fd3e..818c311ad 100644 --- a/src/bin/pgcopydb/dump_restore.c +++ b/src/bin/pgcopydb/dump_restore.c @@ -715,7 +715,7 @@ copydb_write_restore_list_hook(void *ctx, ArchiveContentItem *item) if (item->desc == ARCHIVE_TAG_DATABASE) { skip = true; - log_notice("Skipping DATABASE \"%s\"", name); + log_debug("Skipping DATABASE \"%s\"", name); } /* @@ -729,7 +729,7 @@ copydb_write_restore_list_hook(void *ctx, ArchiveContentItem *item) item->tagType == ARCHIVE_TAG_TYPE_EXTENSION) { skip = true; - log_notice("Skipping COMMENT ON EXTENSION \"%s\"", name); + log_debug("Skipping COMMENT ON EXTENSION \"%s\"", name); } if (!skip && catOid == PG_NAMESPACE_OID) @@ -748,11 +748,11 @@ copydb_write_restore_list_hook(void *ctx, ArchiveContentItem *item) { skip = true; - log_notice("Skipping already existing dumpId %d: %s %u %s", - item->dumpId, - item->description, - item->objectOid, - item->restoreListName); + log_debug("Skipping already existing dumpId %d: %s %u %s", + item->dumpId, + item->description, + item->objectOid, + item->restoreListName); } } @@ -760,11 +760,11 @@ copydb_write_restore_list_hook(void *ctx, ArchiveContentItem *item) { skip = true; - log_notice("Skipping already processed dumpId %d: %s %u %s", - item->dumpId, - item->description, - item->objectOid, - item->restoreListName); + log_debug("Skipping already processed dumpId %d: %s %u %s", + item->dumpId, + item->description, + item->objectOid, + item->restoreListName); } /* @@ -793,23 +793,23 @@ copydb_write_restore_list_hook(void *ctx, ArchiveContentItem *item) { skip = true; - log_notice("Skipping materialized view refresh dumpId %d: %s %u %s", - item->dumpId, - item->description, - item->objectOid, - item->restoreListName); + log_debug("Skipping materialized view refresh dumpId %d: %s %u %s", + item->dumpId, + item->description, + item->objectOid, + item->restoreListName); } if (!skip && copydb_objectid_is_filtered_out(specs, oid, name)) { skip = true; - log_notice("Skipping filtered-out dumpId %d: %s %u %u %s", - item->dumpId, - item->description, - item->catalogOid, - item->objectOid, - item->restoreListName); + log_debug("Skipping filtered-out dumpId %d: %s %u %u %s", + item->dumpId, + item->description, + item->catalogOid, + item->objectOid, + item->restoreListName); } PQExpBuffer buf = createPQExpBuffer(); diff --git a/src/bin/pgcopydb/ld_stream.c b/src/bin/pgcopydb/ld_stream.c index dc4e1f8d0..8481475dc 100644 --- a/src/bin/pgcopydb/ld_stream.c +++ b/src/bin/pgcopydb/ld_stream.c @@ -1048,7 +1048,8 @@ streamCloseFile(LogicalStreamContext *context, bool time_to_abort) * partial transaction. */ if (time_to_abort && - privateContext->jsonFile != NULL && + + /* privateContext->jsonFile != NULL && */ privateContext->endpos != InvalidXLogRecPtr && privateContext->endpos <= context->cur_record_lsn) { @@ -1208,9 +1209,9 @@ streamFlush(LogicalStreamContext *context) { StreamContext *privateContext = (StreamContext *) context->private; - log_debug("streamFlush: %X/%X %X/%X", - LSN_FORMAT_ARGS(context->tracking->written_lsn), - LSN_FORMAT_ARGS(context->cur_record_lsn)); + log_warn("streamFlush: %X/%X %X/%X", + LSN_FORMAT_ARGS(context->tracking->written_lsn), + LSN_FORMAT_ARGS(context->cur_record_lsn)); /* if needed, flush our current file now (fsync) */ if (context->tracking->flushed_lsn < context->tracking->written_lsn) @@ -1230,14 +1231,19 @@ streamFlush(LogicalStreamContext *context) /* * streamKeepalive ensures we have a valid jsonFile by calling * streamRotateFile, so we can safely call fsync here. + * + * TODO: remove that code. */ - int fd = fileno(privateContext->jsonFile); - - if (fsync(fd) != 0) + if (privateContext->jsonFile != NULL) { - log_error("Failed to fsync file \"%s\": %m", - privateContext->partialFileName); - return false; + int fd = fileno(privateContext->jsonFile); + + if (fsync(fd) != 0) + { + log_error("Failed to fsync file \"%s\": %m", + privateContext->partialFileName); + return false; + } } context->tracking->flushed_lsn = context->tracking->written_lsn; @@ -1411,15 +1417,15 @@ stream_sync_sentinel(LogicalStreamContext *context) context->endpos = sentinel.endpos; context->tracking->applied_lsn = sentinel.replay_lsn; - log_debug("stream_sync_sentinel: " - "write_lsn %X/%X flush_lsn %X/%X apply_lsn %X/%X " - "startpos %X/%X endpos %X/%X apply %s", - LSN_FORMAT_ARGS(context->tracking->written_lsn), - LSN_FORMAT_ARGS(context->tracking->flushed_lsn), - LSN_FORMAT_ARGS(context->tracking->applied_lsn), - LSN_FORMAT_ARGS(privateContext->startpos), - LSN_FORMAT_ARGS(privateContext->endpos), - privateContext->apply ? "enabled" : "disabled"); + log_warn("stream_sync_sentinel: " + "write_lsn %X/%X flush_lsn %X/%X apply_lsn %X/%X " + "startpos %X/%X endpos %X/%X apply %s", + LSN_FORMAT_ARGS(context->tracking->written_lsn), + LSN_FORMAT_ARGS(context->tracking->flushed_lsn), + LSN_FORMAT_ARGS(context->tracking->applied_lsn), + LSN_FORMAT_ARGS(privateContext->startpos), + LSN_FORMAT_ARGS(privateContext->endpos), + privateContext->apply ? "enabled" : "disabled"); return true; } diff --git a/tests/follow-wal2json/copydb.sh b/tests/follow-wal2json/copydb.sh index 3ac444da0..60a1f36dc 100755 --- a/tests/follow-wal2json/copydb.sh +++ b/tests/follow-wal2json/copydb.sh @@ -20,7 +20,7 @@ psql -o /tmp/d.out -d ${PGCOPYDB_SOURCE_PGURI} -1 -f /usr/src/pagila/pagila-data psql -d ${PGCOPYDB_SOURCE_PGURI} -f /usr/src/pgcopydb/ddl.sql # pgcopydb clone uses the environment variables -pgcopydb clone --follow --plugin wal2json --debug +pgcopydb clone --follow --plugin wal2json --notice db="/var/lib/postgres/.local/share/pgcopydb/00000001-*.db" From 5e3dfe25633b76875abc25c5537e1846e63c1f73 Mon Sep 17 00:00:00 2001 From: Dimitri Fontaine Date: Thu, 18 Jul 2024 11:08:43 +0200 Subject: [PATCH 7/8] More WIP. --- src/bin/pgcopydb/catalog.c | 4 +++- src/bin/pgcopydb/ld_stream.c | 6 ++++++ src/bin/pgcopydb/ld_transform.c | 16 ++++++++++------ src/bin/pgcopydb/sentinel.c | 4 +++- 4 files changed, 22 insertions(+), 8 deletions(-) diff --git a/src/bin/pgcopydb/catalog.c b/src/bin/pgcopydb/catalog.c index ec27a5920..861392259 100644 --- a/src/bin/pgcopydb/catalog.c +++ b/src/bin/pgcopydb/catalog.c @@ -839,6 +839,8 @@ catalog_init(DatabaseCatalog *catalog) if (sqlite3_open(catalog->dbfile, &(catalog->db)) != SQLITE_OK) { + /* ensure a db is NULL unless it's opened */ + catalog->db = NULL; log_error("Failed to open \"%s\": %s", catalog->dbfile, sqlite3_errmsg(catalog->db)); @@ -8293,7 +8295,7 @@ catalog_sql_step(SQLiteQuery *query) int sleepTimeMs = pgsql_compute_connection_retry_sleep_time(&retryPolicy); - log_sqlite("[SQLite %d]: %s, try again in %dms", + log_notice("[SQLite %d]: %s, try again in %dms", rc, sqlite3_errmsg(query->db), sleepTimeMs); diff --git a/src/bin/pgcopydb/ld_stream.c b/src/bin/pgcopydb/ld_stream.c index 8481475dc..ff1acc04a 100644 --- a/src/bin/pgcopydb/ld_stream.c +++ b/src/bin/pgcopydb/ld_stream.c @@ -1414,6 +1414,12 @@ stream_sync_sentinel(LogicalStreamContext *context) privateContext->endpos = sentinel.endpos; privateContext->startpos = sentinel.startpos; + if (context->endpos != sentinel.endpos) + { + log_warn("stream_sync_sentinel: updating endpos to %X/%X", + LSN_FORMAT_ARGS(sentinel.endpos)); + } + context->endpos = sentinel.endpos; context->tracking->applied_lsn = sentinel.replay_lsn; diff --git a/src/bin/pgcopydb/ld_transform.c b/src/bin/pgcopydb/ld_transform.c index 086650c97..9d696f6d7 100644 --- a/src/bin/pgcopydb/ld_transform.c +++ b/src/bin/pgcopydb/ld_transform.c @@ -140,12 +140,16 @@ stream_transform_messages(StreamSpecs *specs) return false; } - if (!stream_transform_cdc_file(specs)) + /* race conditions: we could have zero file registered yet */ + if (specs->replayDB->db != NULL) { - log_error("Failed to transform CDC messages from file \"%s\", " - "see above for details", - specs->replayDB->dbfile); - return false; + if (!stream_transform_cdc_file(specs)) + { + log_error("Failed to transform CDC messages from file \"%s\", " + "see above for details", + specs->replayDB->dbfile); + return false; + } } /* allow some time for the files and content to be created */ @@ -256,7 +260,7 @@ stream_transform_cdc_file(StreamSpecs *specs) } /* allow some time for the files and content to be created */ - pg_usleep(1500 * 1000); /* 1.5s */ + pg_usleep(50 * 1000); /* 50ms */ } return true; diff --git a/src/bin/pgcopydb/sentinel.c b/src/bin/pgcopydb/sentinel.c index 9a0c7c9ac..ed3e6750b 100644 --- a/src/bin/pgcopydb/sentinel.c +++ b/src/bin/pgcopydb/sentinel.c @@ -285,10 +285,12 @@ sentinel_update_write_flush_lsn(DatabaseCatalog *catalog, if (db == NULL) { - log_error("BUG: sentinel_update_endpos: db is NULL"); + log_error("BUG: sentinel_update_write_flush_lsn: db is NULL"); return false; } + log_warn("sentinel_update_write_flush_lsn: \"%s\"", catalog->dbfile); + char *sql = "update sentinel set startpos = $1, write_lsn = $2, flush_lsn = $3 " "where id = 1"; From 678f35657786642e912cd699409bcd65e3b013a6 Mon Sep 17 00:00:00 2001 From: Dimitri Fontaine Date: Mon, 14 Oct 2024 17:57:55 +0200 Subject: [PATCH 8/8] test signed commit --- src/bin/pgcopydb/ld_transform.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bin/pgcopydb/ld_transform.c b/src/bin/pgcopydb/ld_transform.c index 9d696f6d7..681b2b0a4 100644 --- a/src/bin/pgcopydb/ld_transform.c +++ b/src/bin/pgcopydb/ld_transform.c @@ -2398,7 +2398,7 @@ stream_write_update(ReplayDBStmt *replayStmt, LogicalMessageUpdate *update) return false; } } - + if (first) { first = false;