summaryrefslogtreecommitdiffstats
path: root/src/bin/pg_rewind/libpq_source.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:15:05 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:15:05 +0000
commit46651ce6fe013220ed397add242004d764fc0153 (patch)
tree6e5299f990f88e60174a1d3ae6e48eedd2688b2b /src/bin/pg_rewind/libpq_source.c
parentInitial commit. (diff)
downloadpostgresql-14-46651ce6fe013220ed397add242004d764fc0153.tar.xz
postgresql-14-46651ce6fe013220ed397add242004d764fc0153.zip
Adding upstream version 14.5.upstream/14.5upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/bin/pg_rewind/libpq_source.c')
-rw-r--r--src/bin/pg_rewind/libpq_source.c643
1 files changed, 643 insertions, 0 deletions
diff --git a/src/bin/pg_rewind/libpq_source.c b/src/bin/pg_rewind/libpq_source.c
new file mode 100644
index 0000000..8e0783f
--- /dev/null
+++ b/src/bin/pg_rewind/libpq_source.c
@@ -0,0 +1,643 @@
+/*-------------------------------------------------------------------------
+ *
+ * libpq_source.c
+ * Functions for fetching files from a remote server via libpq.
+ *
+ * Copyright (c) 2013-2021, PostgreSQL Global Development Group
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres_fe.h"
+
+#include "catalog/pg_type_d.h"
+#include "common/connect.h"
+#include "datapagemap.h"
+#include "file_ops.h"
+#include "filemap.h"
+#include "lib/stringinfo.h"
+#include "pg_rewind.h"
+#include "port/pg_bswap.h"
+#include "rewind_source.h"
+
+/*
+ * Files are fetched MAX_CHUNK_SIZE bytes at a time, and with a
+ * maximum of MAX_CHUNKS_PER_QUERY chunks in a single query.
+ */
+#define MAX_CHUNK_SIZE (1024 * 1024)
+#define MAX_CHUNKS_PER_QUERY 1000
+
+/* represents a request to fetch a piece of a file from the source */
+typedef struct
+{
+ const char *path; /* path relative to data directory root */
+ off_t offset;
+ size_t length;
+} fetch_range_request;
+
+typedef struct
+{
+ rewind_source common; /* common interface functions */
+
+ PGconn *conn;
+
+ /*
+ * Queue of chunks that have been requested with the queue_fetch_range()
+ * function, but have not been fetched from the remote server yet.
+ */
+ int num_requests;
+ fetch_range_request request_queue[MAX_CHUNKS_PER_QUERY];
+
+ /* temporary space for process_queued_fetch_requests() */
+ StringInfoData paths;
+ StringInfoData offsets;
+ StringInfoData lengths;
+} libpq_source;
+
+static void init_libpq_conn(PGconn *conn);
+static char *run_simple_query(PGconn *conn, const char *sql);
+static void run_simple_command(PGconn *conn, const char *sql);
+static void appendArrayEscapedString(StringInfo buf, const char *str);
+
+static void process_queued_fetch_requests(libpq_source *src);
+
+/* public interface functions */
+static void libpq_traverse_files(rewind_source *source,
+ process_file_callback_t callback);
+static void libpq_queue_fetch_range(rewind_source *source, const char *path,
+ off_t off, size_t len);
+static void libpq_finish_fetch(rewind_source *source);
+static char *libpq_fetch_file(rewind_source *source, const char *path,
+ size_t *filesize);
+static XLogRecPtr libpq_get_current_wal_insert_lsn(rewind_source *source);
+static void libpq_destroy(rewind_source *source);
+
+/*
+ * Create a new libpq source.
+ *
+ * The caller has already established the connection, but should not try
+ * to use it while the source is active.
+ */
+rewind_source *
+init_libpq_source(PGconn *conn)
+{
+ libpq_source *src;
+
+ init_libpq_conn(conn);
+
+ src = pg_malloc0(sizeof(libpq_source));
+
+ src->common.traverse_files = libpq_traverse_files;
+ src->common.fetch_file = libpq_fetch_file;
+ src->common.queue_fetch_range = libpq_queue_fetch_range;
+ src->common.finish_fetch = libpq_finish_fetch;
+ src->common.get_current_wal_insert_lsn = libpq_get_current_wal_insert_lsn;
+ src->common.destroy = libpq_destroy;
+
+ src->conn = conn;
+
+ initStringInfo(&src->paths);
+ initStringInfo(&src->offsets);
+ initStringInfo(&src->lengths);
+
+ return &src->common;
+}
+
+/*
+ * Initialize a libpq connection for use.
+ */
+static void
+init_libpq_conn(PGconn *conn)
+{
+ PGresult *res;
+ char *str;
+
+ /* disable all types of timeouts */
+ run_simple_command(conn, "SET statement_timeout = 0");
+ run_simple_command(conn, "SET lock_timeout = 0");
+ run_simple_command(conn, "SET idle_in_transaction_session_timeout = 0");
+
+ /*
+ * we don't intend to do any updates, put the connection in read-only mode
+ * to keep us honest
+ */
+ run_simple_command(conn, "SET default_transaction_read_only = on");
+
+ /* secure search_path */
+ res = PQexec(conn, ALWAYS_SECURE_SEARCH_PATH_SQL);
+ if (PQresultStatus(res) != PGRES_TUPLES_OK)
+ pg_fatal("could not clear search_path: %s",
+ PQresultErrorMessage(res));
+ PQclear(res);
+
+ /*
+ * Also check that full_page_writes is enabled. We can get torn pages if
+ * a page is modified while we read it with pg_read_binary_file(), and we
+ * rely on full page images to fix them.
+ */
+ str = run_simple_query(conn, "SHOW full_page_writes");
+ if (strcmp(str, "on") != 0)
+ pg_fatal("full_page_writes must be enabled in the source server");
+ pg_free(str);
+
+ /* Prepare a statement we'll use to fetch files */
+ res = PQprepare(conn, "fetch_chunks_stmt",
+ "SELECT path, begin,\n"
+ " pg_read_binary_file(path, begin, len, true) AS chunk\n"
+ "FROM unnest ($1::text[], $2::int8[], $3::int4[]) as x(path, begin, len)",
+ 3, NULL);
+
+ if (PQresultStatus(res) != PGRES_COMMAND_OK)
+ pg_fatal("could not prepare statement to fetch file contents: %s",
+ PQresultErrorMessage(res));
+ PQclear(res);
+}
+
+/*
+ * Run a query that returns a single value.
+ *
+ * The result should be pg_free'd after use.
+ */
+static char *
+run_simple_query(PGconn *conn, const char *sql)
+{
+ PGresult *res;
+ char *result;
+
+ res = PQexec(conn, sql);
+
+ if (PQresultStatus(res) != PGRES_TUPLES_OK)
+ pg_fatal("error running query (%s) on source server: %s",
+ sql, PQresultErrorMessage(res));
+
+ /* sanity check the result set */
+ if (PQnfields(res) != 1 || PQntuples(res) != 1 || PQgetisnull(res, 0, 0))
+ pg_fatal("unexpected result set from query");
+
+ result = pg_strdup(PQgetvalue(res, 0, 0));
+
+ PQclear(res);
+
+ return result;
+}
+
+/*
+ * Run a command.
+ *
+ * In the event of a failure, exit immediately.
+ */
+static void
+run_simple_command(PGconn *conn, const char *sql)
+{
+ PGresult *res;
+
+ res = PQexec(conn, sql);
+
+ if (PQresultStatus(res) != PGRES_COMMAND_OK)
+ pg_fatal("error running query (%s) in source server: %s",
+ sql, PQresultErrorMessage(res));
+
+ PQclear(res);
+}
+
+/*
+ * Call the pg_current_wal_insert_lsn() function in the remote system.
+ */
+static XLogRecPtr
+libpq_get_current_wal_insert_lsn(rewind_source *source)
+{
+ PGconn *conn = ((libpq_source *) source)->conn;
+ XLogRecPtr result;
+ uint32 hi;
+ uint32 lo;
+ char *val;
+
+ val = run_simple_query(conn, "SELECT pg_current_wal_insert_lsn()");
+
+ if (sscanf(val, "%X/%X", &hi, &lo) != 2)
+ pg_fatal("unrecognized result \"%s\" for current WAL insert location", val);
+
+ result = ((uint64) hi) << 32 | lo;
+
+ pg_free(val);
+
+ return result;
+}
+
+/*
+ * Get a list of all files in the data directory.
+ */
+static void
+libpq_traverse_files(rewind_source *source, process_file_callback_t callback)
+{
+ PGconn *conn = ((libpq_source *) source)->conn;
+ PGresult *res;
+ const char *sql;
+ int i;
+
+ /*
+ * Create a recursive directory listing of the whole data directory.
+ *
+ * The WITH RECURSIVE part does most of the work. The second part gets the
+ * targets of the symlinks in pg_tblspc directory.
+ *
+ * XXX: There is no backend function to get a symbolic link's target in
+ * general, so if the admin has put any custom symbolic links in the data
+ * directory, they won't be copied correctly.
+ */
+ sql =
+ "WITH RECURSIVE files (path, filename, size, isdir) AS (\n"
+ " SELECT '' AS path, filename, size, isdir FROM\n"
+ " (SELECT pg_ls_dir('.', true, false) AS filename) AS fn,\n"
+ " pg_stat_file(fn.filename, true) AS this\n"
+ " UNION ALL\n"
+ " SELECT parent.path || parent.filename || '/' AS path,\n"
+ " fn, this.size, this.isdir\n"
+ " FROM files AS parent,\n"
+ " pg_ls_dir(parent.path || parent.filename, true, false) AS fn,\n"
+ " pg_stat_file(parent.path || parent.filename || '/' || fn, true) AS this\n"
+ " WHERE parent.isdir = 't'\n"
+ ")\n"
+ "SELECT path || filename, size, isdir,\n"
+ " pg_tablespace_location(pg_tablespace.oid) AS link_target\n"
+ "FROM files\n"
+ "LEFT OUTER JOIN pg_tablespace ON files.path = 'pg_tblspc/'\n"
+ " AND oid::text = files.filename\n";
+ res = PQexec(conn, sql);
+
+ if (PQresultStatus(res) != PGRES_TUPLES_OK)
+ pg_fatal("could not fetch file list: %s",
+ PQresultErrorMessage(res));
+
+ /* sanity check the result set */
+ if (PQnfields(res) != 4)
+ pg_fatal("unexpected result set while fetching file list");
+
+ /* Read result to local variables */
+ for (i = 0; i < PQntuples(res); i++)
+ {
+ char *path;
+ int64 filesize;
+ bool isdir;
+ char *link_target;
+ file_type_t type;
+
+ if (PQgetisnull(res, i, 1))
+ {
+ /*
+ * The file was removed from the server while the query was
+ * running. Ignore it.
+ */
+ continue;
+ }
+
+ path = PQgetvalue(res, i, 0);
+ filesize = atol(PQgetvalue(res, i, 1));
+ isdir = (strcmp(PQgetvalue(res, i, 2), "t") == 0);
+ link_target = PQgetvalue(res, i, 3);
+
+ if (link_target[0])
+ type = FILE_TYPE_SYMLINK;
+ else if (isdir)
+ type = FILE_TYPE_DIRECTORY;
+ else
+ type = FILE_TYPE_REGULAR;
+
+ process_source_file(path, type, filesize, link_target);
+ }
+ PQclear(res);
+}
+
+/*
+ * Queue up a request to fetch a piece of a file from remote system.
+ */
+static void
+libpq_queue_fetch_range(rewind_source *source, const char *path, off_t off,
+ size_t len)
+{
+ libpq_source *src = (libpq_source *) source;
+
+ /*
+ * Does this request happen to be a continuation of the previous chunk? If
+ * so, merge it with the previous one.
+ *
+ * XXX: We use pointer equality to compare the path. That's good enough
+ * for our purposes; the caller always passes the same pointer for the
+ * same filename. If it didn't, we would fail to merge requests, but it
+ * wouldn't affect correctness.
+ */
+ if (src->num_requests > 0)
+ {
+ fetch_range_request *prev = &src->request_queue[src->num_requests - 1];
+
+ if (prev->offset + prev->length == off &&
+ prev->length < MAX_CHUNK_SIZE &&
+ prev->path == path)
+ {
+ /*
+ * Extend the previous request to cover as much of this new
+ * request as possible, without exceeding MAX_CHUNK_SIZE.
+ */
+ size_t thislen;
+
+ thislen = Min(len, MAX_CHUNK_SIZE - prev->length);
+ prev->length += thislen;
+
+ off += thislen;
+ len -= thislen;
+
+ /*
+ * Fall through to create new requests for any remaining 'len'
+ * that didn't fit in the previous chunk.
+ */
+ }
+ }
+
+ /* Divide the request into pieces of MAX_CHUNK_SIZE bytes each */
+ while (len > 0)
+ {
+ int32 thislen;
+
+ /* if the queue is full, perform all the work queued up so far */
+ if (src->num_requests == MAX_CHUNKS_PER_QUERY)
+ process_queued_fetch_requests(src);
+
+ thislen = Min(len, MAX_CHUNK_SIZE);
+ src->request_queue[src->num_requests].path = path;
+ src->request_queue[src->num_requests].offset = off;
+ src->request_queue[src->num_requests].length = thislen;
+ src->num_requests++;
+
+ off += thislen;
+ len -= thislen;
+ }
+}
+
+/*
+ * Fetch all the queued chunks and write them to the target data directory.
+ */
+static void
+libpq_finish_fetch(rewind_source *source)
+{
+ process_queued_fetch_requests((libpq_source *) source);
+}
+
+static void
+process_queued_fetch_requests(libpq_source *src)
+{
+ const char *params[3];
+ PGresult *res;
+ int chunkno;
+
+ if (src->num_requests == 0)
+ return;
+
+ pg_log_debug("getting %d file chunks", src->num_requests);
+
+ /*
+ * The prepared statement, 'fetch_chunks_stmt', takes three arrays with
+ * the same length as parameters: paths, offsets and lengths. Construct
+ * the string representations of them.
+ */
+ resetStringInfo(&src->paths);
+ resetStringInfo(&src->offsets);
+ resetStringInfo(&src->lengths);
+
+ appendStringInfoChar(&src->paths, '{');
+ appendStringInfoChar(&src->offsets, '{');
+ appendStringInfoChar(&src->lengths, '{');
+ for (int i = 0; i < src->num_requests; i++)
+ {
+ fetch_range_request *rq = &src->request_queue[i];
+
+ if (i > 0)
+ {
+ appendStringInfoChar(&src->paths, ',');
+ appendStringInfoChar(&src->offsets, ',');
+ appendStringInfoChar(&src->lengths, ',');
+ }
+
+ appendArrayEscapedString(&src->paths, rq->path);
+ appendStringInfo(&src->offsets, INT64_FORMAT, (int64) rq->offset);
+ appendStringInfo(&src->lengths, INT64_FORMAT, (int64) rq->length);
+ }
+ appendStringInfoChar(&src->paths, '}');
+ appendStringInfoChar(&src->offsets, '}');
+ appendStringInfoChar(&src->lengths, '}');
+
+ /*
+ * Execute the prepared statement.
+ */
+ params[0] = src->paths.data;
+ params[1] = src->offsets.data;
+ params[2] = src->lengths.data;
+
+ if (PQsendQueryPrepared(src->conn, "fetch_chunks_stmt", 3, params, NULL, NULL, 1) != 1)
+ pg_fatal("could not send query: %s", PQerrorMessage(src->conn));
+
+ if (PQsetSingleRowMode(src->conn) != 1)
+ pg_fatal("could not set libpq connection to single row mode");
+
+ /*----
+ * The result set is of format:
+ *
+ * path text -- path in the data directory, e.g "base/1/123"
+ * begin int8 -- offset within the file
+ * chunk bytea -- file content
+ *----
+ */
+ chunkno = 0;
+ while ((res = PQgetResult(src->conn)) != NULL)
+ {
+ fetch_range_request *rq = &src->request_queue[chunkno];
+ char *filename;
+ int filenamelen;
+ int64 chunkoff;
+ int chunksize;
+ char *chunk;
+
+ switch (PQresultStatus(res))
+ {
+ case PGRES_SINGLE_TUPLE:
+ break;
+
+ case PGRES_TUPLES_OK:
+ PQclear(res);
+ continue; /* final zero-row result */
+
+ default:
+ pg_fatal("unexpected result while fetching remote files: %s",
+ PQresultErrorMessage(res));
+ }
+
+ if (chunkno > src->num_requests)
+ pg_fatal("received more data chunks than requested");
+
+ /* sanity check the result set */
+ if (PQnfields(res) != 3 || PQntuples(res) != 1)
+ pg_fatal("unexpected result set size while fetching remote files");
+
+ if (PQftype(res, 0) != TEXTOID ||
+ PQftype(res, 1) != INT8OID ||
+ PQftype(res, 2) != BYTEAOID)
+ {
+ pg_fatal("unexpected data types in result set while fetching remote files: %u %u %u",
+ PQftype(res, 0), PQftype(res, 1), PQftype(res, 2));
+ }
+
+ if (PQfformat(res, 0) != 1 &&
+ PQfformat(res, 1) != 1 &&
+ PQfformat(res, 2) != 1)
+ {
+ pg_fatal("unexpected result format while fetching remote files");
+ }
+
+ if (PQgetisnull(res, 0, 0) ||
+ PQgetisnull(res, 0, 1))
+ {
+ pg_fatal("unexpected null values in result while fetching remote files");
+ }
+
+ if (PQgetlength(res, 0, 1) != sizeof(int64))
+ pg_fatal("unexpected result length while fetching remote files");
+
+ /* Read result set to local variables */
+ memcpy(&chunkoff, PQgetvalue(res, 0, 1), sizeof(int64));
+ chunkoff = pg_ntoh64(chunkoff);
+ chunksize = PQgetlength(res, 0, 2);
+
+ filenamelen = PQgetlength(res, 0, 0);
+ filename = pg_malloc(filenamelen + 1);
+ memcpy(filename, PQgetvalue(res, 0, 0), filenamelen);
+ filename[filenamelen] = '\0';
+
+ chunk = PQgetvalue(res, 0, 2);
+
+ /*
+ * If a file has been deleted on the source, remove it on the target
+ * as well. Note that multiple unlink() calls may happen on the same
+ * file if multiple data chunks are associated with it, hence ignore
+ * unconditionally anything missing.
+ */
+ if (PQgetisnull(res, 0, 2))
+ {
+ pg_log_debug("received null value for chunk for file \"%s\", file has been deleted",
+ filename);
+ remove_target_file(filename, true);
+ }
+ else
+ {
+ pg_log_debug("received chunk for file \"%s\", offset %lld, size %d",
+ filename, (long long int) chunkoff, chunksize);
+
+ if (strcmp(filename, rq->path) != 0)
+ {
+ pg_fatal("received data for file \"%s\", when requested for \"%s\"",
+ filename, rq->path);
+ }
+ if (chunkoff != rq->offset)
+ pg_fatal("received data at offset %lld of file \"%s\", when requested for offset %lld",
+ (long long int) chunkoff, rq->path, (long long int) rq->offset);
+
+ /*
+ * We should not receive more data than we requested, or
+ * pg_read_binary_file() messed up. We could receive less,
+ * though, if the file was truncated in the source after we
+ * checked its size. That's OK, there should be a WAL record of
+ * the truncation, which will get replayed when you start the
+ * target system for the first time after pg_rewind has completed.
+ */
+ if (chunksize > rq->length)
+ pg_fatal("received more than requested for file \"%s\"", rq->path);
+
+ open_target_file(filename, false);
+
+ write_target_range(chunk, chunkoff, chunksize);
+ }
+
+ pg_free(filename);
+
+ PQclear(res);
+ chunkno++;
+ }
+ if (chunkno != src->num_requests)
+ pg_fatal("unexpected number of data chunks received");
+
+ src->num_requests = 0;
+}
+
+/*
+ * Escape a string to be used as element in a text array constant
+ */
+static void
+appendArrayEscapedString(StringInfo buf, const char *str)
+{
+ appendStringInfoCharMacro(buf, '\"');
+ while (*str)
+ {
+ char ch = *str;
+
+ if (ch == '"' || ch == '\\')
+ appendStringInfoCharMacro(buf, '\\');
+
+ appendStringInfoCharMacro(buf, ch);
+
+ str++;
+ }
+ appendStringInfoCharMacro(buf, '\"');
+}
+
+/*
+ * Fetch a single file as a malloc'd buffer.
+ */
+static char *
+libpq_fetch_file(rewind_source *source, const char *path, size_t *filesize)
+{
+ PGconn *conn = ((libpq_source *) source)->conn;
+ PGresult *res;
+ char *result;
+ int len;
+ const char *paramValues[1];
+
+ paramValues[0] = path;
+ res = PQexecParams(conn, "SELECT pg_read_binary_file($1)",
+ 1, NULL, paramValues, NULL, NULL, 1);
+
+ if (PQresultStatus(res) != PGRES_TUPLES_OK)
+ pg_fatal("could not fetch remote file \"%s\": %s",
+ path, PQresultErrorMessage(res));
+
+ /* sanity check the result set */
+ if (PQntuples(res) != 1 || PQgetisnull(res, 0, 0))
+ pg_fatal("unexpected result set while fetching remote file \"%s\"",
+ path);
+
+ /* Read result to local variables */
+ len = PQgetlength(res, 0, 0);
+ result = pg_malloc(len + 1);
+ memcpy(result, PQgetvalue(res, 0, 0), len);
+ result[len] = '\0';
+
+ PQclear(res);
+
+ pg_log_debug("fetched file \"%s\", length %d", path, len);
+
+ if (filesize)
+ *filesize = len;
+ return result;
+}
+
+/*
+ * Close a libpq source.
+ */
+static void
+libpq_destroy(rewind_source *source)
+{
+ libpq_source *src = (libpq_source *) source;
+
+ pfree(src->paths.data);
+ pfree(src->offsets.data);
+ pfree(src->lengths.data);
+ pfree(src);
+
+ /* NOTE: we don't close the connection here, as it was not opened by us. */
+}