diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:15:05 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:15:05 +0000 |
commit | 46651ce6fe013220ed397add242004d764fc0153 (patch) | |
tree | 6e5299f990f88e60174a1d3ae6e48eedd2688b2b /src/bin/pg_rewind/libpq_source.c | |
parent | Initial commit. (diff) | |
download | postgresql-14-46651ce6fe013220ed397add242004d764fc0153.tar.xz postgresql-14-46651ce6fe013220ed397add242004d764fc0153.zip |
Adding upstream version 14.5.upstream/14.5upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/bin/pg_rewind/libpq_source.c')
-rw-r--r-- | src/bin/pg_rewind/libpq_source.c | 643 |
1 files changed, 643 insertions, 0 deletions
diff --git a/src/bin/pg_rewind/libpq_source.c b/src/bin/pg_rewind/libpq_source.c new file mode 100644 index 0000000..8e0783f --- /dev/null +++ b/src/bin/pg_rewind/libpq_source.c @@ -0,0 +1,643 @@ +/*------------------------------------------------------------------------- + * + * libpq_source.c + * Functions for fetching files from a remote server via libpq. + * + * Copyright (c) 2013-2021, PostgreSQL Global Development Group + * + *------------------------------------------------------------------------- + */ +#include "postgres_fe.h" + +#include "catalog/pg_type_d.h" +#include "common/connect.h" +#include "datapagemap.h" +#include "file_ops.h" +#include "filemap.h" +#include "lib/stringinfo.h" +#include "pg_rewind.h" +#include "port/pg_bswap.h" +#include "rewind_source.h" + +/* + * Files are fetched MAX_CHUNK_SIZE bytes at a time, and with a + * maximum of MAX_CHUNKS_PER_QUERY chunks in a single query. + */ +#define MAX_CHUNK_SIZE (1024 * 1024) +#define MAX_CHUNKS_PER_QUERY 1000 + +/* represents a request to fetch a piece of a file from the source */ +typedef struct +{ + const char *path; /* path relative to data directory root */ + off_t offset; + size_t length; +} fetch_range_request; + +typedef struct +{ + rewind_source common; /* common interface functions */ + + PGconn *conn; + + /* + * Queue of chunks that have been requested with the queue_fetch_range() + * function, but have not been fetched from the remote server yet. + */ + int num_requests; + fetch_range_request request_queue[MAX_CHUNKS_PER_QUERY]; + + /* temporary space for process_queued_fetch_requests() */ + StringInfoData paths; + StringInfoData offsets; + StringInfoData lengths; +} libpq_source; + +static void init_libpq_conn(PGconn *conn); +static char *run_simple_query(PGconn *conn, const char *sql); +static void run_simple_command(PGconn *conn, const char *sql); +static void appendArrayEscapedString(StringInfo buf, const char *str); + +static void process_queued_fetch_requests(libpq_source *src); + +/* public interface functions */ +static void libpq_traverse_files(rewind_source *source, + process_file_callback_t callback); +static void libpq_queue_fetch_range(rewind_source *source, const char *path, + off_t off, size_t len); +static void libpq_finish_fetch(rewind_source *source); +static char *libpq_fetch_file(rewind_source *source, const char *path, + size_t *filesize); +static XLogRecPtr libpq_get_current_wal_insert_lsn(rewind_source *source); +static void libpq_destroy(rewind_source *source); + +/* + * Create a new libpq source. + * + * The caller has already established the connection, but should not try + * to use it while the source is active. + */ +rewind_source * +init_libpq_source(PGconn *conn) +{ + libpq_source *src; + + init_libpq_conn(conn); + + src = pg_malloc0(sizeof(libpq_source)); + + src->common.traverse_files = libpq_traverse_files; + src->common.fetch_file = libpq_fetch_file; + src->common.queue_fetch_range = libpq_queue_fetch_range; + src->common.finish_fetch = libpq_finish_fetch; + src->common.get_current_wal_insert_lsn = libpq_get_current_wal_insert_lsn; + src->common.destroy = libpq_destroy; + + src->conn = conn; + + initStringInfo(&src->paths); + initStringInfo(&src->offsets); + initStringInfo(&src->lengths); + + return &src->common; +} + +/* + * Initialize a libpq connection for use. + */ +static void +init_libpq_conn(PGconn *conn) +{ + PGresult *res; + char *str; + + /* disable all types of timeouts */ + run_simple_command(conn, "SET statement_timeout = 0"); + run_simple_command(conn, "SET lock_timeout = 0"); + run_simple_command(conn, "SET idle_in_transaction_session_timeout = 0"); + + /* + * we don't intend to do any updates, put the connection in read-only mode + * to keep us honest + */ + run_simple_command(conn, "SET default_transaction_read_only = on"); + + /* secure search_path */ + res = PQexec(conn, ALWAYS_SECURE_SEARCH_PATH_SQL); + if (PQresultStatus(res) != PGRES_TUPLES_OK) + pg_fatal("could not clear search_path: %s", + PQresultErrorMessage(res)); + PQclear(res); + + /* + * Also check that full_page_writes is enabled. We can get torn pages if + * a page is modified while we read it with pg_read_binary_file(), and we + * rely on full page images to fix them. + */ + str = run_simple_query(conn, "SHOW full_page_writes"); + if (strcmp(str, "on") != 0) + pg_fatal("full_page_writes must be enabled in the source server"); + pg_free(str); + + /* Prepare a statement we'll use to fetch files */ + res = PQprepare(conn, "fetch_chunks_stmt", + "SELECT path, begin,\n" + " pg_read_binary_file(path, begin, len, true) AS chunk\n" + "FROM unnest ($1::text[], $2::int8[], $3::int4[]) as x(path, begin, len)", + 3, NULL); + + if (PQresultStatus(res) != PGRES_COMMAND_OK) + pg_fatal("could not prepare statement to fetch file contents: %s", + PQresultErrorMessage(res)); + PQclear(res); +} + +/* + * Run a query that returns a single value. + * + * The result should be pg_free'd after use. + */ +static char * +run_simple_query(PGconn *conn, const char *sql) +{ + PGresult *res; + char *result; + + res = PQexec(conn, sql); + + if (PQresultStatus(res) != PGRES_TUPLES_OK) + pg_fatal("error running query (%s) on source server: %s", + sql, PQresultErrorMessage(res)); + + /* sanity check the result set */ + if (PQnfields(res) != 1 || PQntuples(res) != 1 || PQgetisnull(res, 0, 0)) + pg_fatal("unexpected result set from query"); + + result = pg_strdup(PQgetvalue(res, 0, 0)); + + PQclear(res); + + return result; +} + +/* + * Run a command. + * + * In the event of a failure, exit immediately. + */ +static void +run_simple_command(PGconn *conn, const char *sql) +{ + PGresult *res; + + res = PQexec(conn, sql); + + if (PQresultStatus(res) != PGRES_COMMAND_OK) + pg_fatal("error running query (%s) in source server: %s", + sql, PQresultErrorMessage(res)); + + PQclear(res); +} + +/* + * Call the pg_current_wal_insert_lsn() function in the remote system. + */ +static XLogRecPtr +libpq_get_current_wal_insert_lsn(rewind_source *source) +{ + PGconn *conn = ((libpq_source *) source)->conn; + XLogRecPtr result; + uint32 hi; + uint32 lo; + char *val; + + val = run_simple_query(conn, "SELECT pg_current_wal_insert_lsn()"); + + if (sscanf(val, "%X/%X", &hi, &lo) != 2) + pg_fatal("unrecognized result \"%s\" for current WAL insert location", val); + + result = ((uint64) hi) << 32 | lo; + + pg_free(val); + + return result; +} + +/* + * Get a list of all files in the data directory. + */ +static void +libpq_traverse_files(rewind_source *source, process_file_callback_t callback) +{ + PGconn *conn = ((libpq_source *) source)->conn; + PGresult *res; + const char *sql; + int i; + + /* + * Create a recursive directory listing of the whole data directory. + * + * The WITH RECURSIVE part does most of the work. The second part gets the + * targets of the symlinks in pg_tblspc directory. + * + * XXX: There is no backend function to get a symbolic link's target in + * general, so if the admin has put any custom symbolic links in the data + * directory, they won't be copied correctly. + */ + sql = + "WITH RECURSIVE files (path, filename, size, isdir) AS (\n" + " SELECT '' AS path, filename, size, isdir FROM\n" + " (SELECT pg_ls_dir('.', true, false) AS filename) AS fn,\n" + " pg_stat_file(fn.filename, true) AS this\n" + " UNION ALL\n" + " SELECT parent.path || parent.filename || '/' AS path,\n" + " fn, this.size, this.isdir\n" + " FROM files AS parent,\n" + " pg_ls_dir(parent.path || parent.filename, true, false) AS fn,\n" + " pg_stat_file(parent.path || parent.filename || '/' || fn, true) AS this\n" + " WHERE parent.isdir = 't'\n" + ")\n" + "SELECT path || filename, size, isdir,\n" + " pg_tablespace_location(pg_tablespace.oid) AS link_target\n" + "FROM files\n" + "LEFT OUTER JOIN pg_tablespace ON files.path = 'pg_tblspc/'\n" + " AND oid::text = files.filename\n"; + res = PQexec(conn, sql); + + if (PQresultStatus(res) != PGRES_TUPLES_OK) + pg_fatal("could not fetch file list: %s", + PQresultErrorMessage(res)); + + /* sanity check the result set */ + if (PQnfields(res) != 4) + pg_fatal("unexpected result set while fetching file list"); + + /* Read result to local variables */ + for (i = 0; i < PQntuples(res); i++) + { + char *path; + int64 filesize; + bool isdir; + char *link_target; + file_type_t type; + + if (PQgetisnull(res, i, 1)) + { + /* + * The file was removed from the server while the query was + * running. Ignore it. + */ + continue; + } + + path = PQgetvalue(res, i, 0); + filesize = atol(PQgetvalue(res, i, 1)); + isdir = (strcmp(PQgetvalue(res, i, 2), "t") == 0); + link_target = PQgetvalue(res, i, 3); + + if (link_target[0]) + type = FILE_TYPE_SYMLINK; + else if (isdir) + type = FILE_TYPE_DIRECTORY; + else + type = FILE_TYPE_REGULAR; + + process_source_file(path, type, filesize, link_target); + } + PQclear(res); +} + +/* + * Queue up a request to fetch a piece of a file from remote system. + */ +static void +libpq_queue_fetch_range(rewind_source *source, const char *path, off_t off, + size_t len) +{ + libpq_source *src = (libpq_source *) source; + + /* + * Does this request happen to be a continuation of the previous chunk? If + * so, merge it with the previous one. + * + * XXX: We use pointer equality to compare the path. That's good enough + * for our purposes; the caller always passes the same pointer for the + * same filename. If it didn't, we would fail to merge requests, but it + * wouldn't affect correctness. + */ + if (src->num_requests > 0) + { + fetch_range_request *prev = &src->request_queue[src->num_requests - 1]; + + if (prev->offset + prev->length == off && + prev->length < MAX_CHUNK_SIZE && + prev->path == path) + { + /* + * Extend the previous request to cover as much of this new + * request as possible, without exceeding MAX_CHUNK_SIZE. + */ + size_t thislen; + + thislen = Min(len, MAX_CHUNK_SIZE - prev->length); + prev->length += thislen; + + off += thislen; + len -= thislen; + + /* + * Fall through to create new requests for any remaining 'len' + * that didn't fit in the previous chunk. + */ + } + } + + /* Divide the request into pieces of MAX_CHUNK_SIZE bytes each */ + while (len > 0) + { + int32 thislen; + + /* if the queue is full, perform all the work queued up so far */ + if (src->num_requests == MAX_CHUNKS_PER_QUERY) + process_queued_fetch_requests(src); + + thislen = Min(len, MAX_CHUNK_SIZE); + src->request_queue[src->num_requests].path = path; + src->request_queue[src->num_requests].offset = off; + src->request_queue[src->num_requests].length = thislen; + src->num_requests++; + + off += thislen; + len -= thislen; + } +} + +/* + * Fetch all the queued chunks and write them to the target data directory. + */ +static void +libpq_finish_fetch(rewind_source *source) +{ + process_queued_fetch_requests((libpq_source *) source); +} + +static void +process_queued_fetch_requests(libpq_source *src) +{ + const char *params[3]; + PGresult *res; + int chunkno; + + if (src->num_requests == 0) + return; + + pg_log_debug("getting %d file chunks", src->num_requests); + + /* + * The prepared statement, 'fetch_chunks_stmt', takes three arrays with + * the same length as parameters: paths, offsets and lengths. Construct + * the string representations of them. + */ + resetStringInfo(&src->paths); + resetStringInfo(&src->offsets); + resetStringInfo(&src->lengths); + + appendStringInfoChar(&src->paths, '{'); + appendStringInfoChar(&src->offsets, '{'); + appendStringInfoChar(&src->lengths, '{'); + for (int i = 0; i < src->num_requests; i++) + { + fetch_range_request *rq = &src->request_queue[i]; + + if (i > 0) + { + appendStringInfoChar(&src->paths, ','); + appendStringInfoChar(&src->offsets, ','); + appendStringInfoChar(&src->lengths, ','); + } + + appendArrayEscapedString(&src->paths, rq->path); + appendStringInfo(&src->offsets, INT64_FORMAT, (int64) rq->offset); + appendStringInfo(&src->lengths, INT64_FORMAT, (int64) rq->length); + } + appendStringInfoChar(&src->paths, '}'); + appendStringInfoChar(&src->offsets, '}'); + appendStringInfoChar(&src->lengths, '}'); + + /* + * Execute the prepared statement. + */ + params[0] = src->paths.data; + params[1] = src->offsets.data; + params[2] = src->lengths.data; + + if (PQsendQueryPrepared(src->conn, "fetch_chunks_stmt", 3, params, NULL, NULL, 1) != 1) + pg_fatal("could not send query: %s", PQerrorMessage(src->conn)); + + if (PQsetSingleRowMode(src->conn) != 1) + pg_fatal("could not set libpq connection to single row mode"); + + /*---- + * The result set is of format: + * + * path text -- path in the data directory, e.g "base/1/123" + * begin int8 -- offset within the file + * chunk bytea -- file content + *---- + */ + chunkno = 0; + while ((res = PQgetResult(src->conn)) != NULL) + { + fetch_range_request *rq = &src->request_queue[chunkno]; + char *filename; + int filenamelen; + int64 chunkoff; + int chunksize; + char *chunk; + + switch (PQresultStatus(res)) + { + case PGRES_SINGLE_TUPLE: + break; + + case PGRES_TUPLES_OK: + PQclear(res); + continue; /* final zero-row result */ + + default: + pg_fatal("unexpected result while fetching remote files: %s", + PQresultErrorMessage(res)); + } + + if (chunkno > src->num_requests) + pg_fatal("received more data chunks than requested"); + + /* sanity check the result set */ + if (PQnfields(res) != 3 || PQntuples(res) != 1) + pg_fatal("unexpected result set size while fetching remote files"); + + if (PQftype(res, 0) != TEXTOID || + PQftype(res, 1) != INT8OID || + PQftype(res, 2) != BYTEAOID) + { + pg_fatal("unexpected data types in result set while fetching remote files: %u %u %u", + PQftype(res, 0), PQftype(res, 1), PQftype(res, 2)); + } + + if (PQfformat(res, 0) != 1 && + PQfformat(res, 1) != 1 && + PQfformat(res, 2) != 1) + { + pg_fatal("unexpected result format while fetching remote files"); + } + + if (PQgetisnull(res, 0, 0) || + PQgetisnull(res, 0, 1)) + { + pg_fatal("unexpected null values in result while fetching remote files"); + } + + if (PQgetlength(res, 0, 1) != sizeof(int64)) + pg_fatal("unexpected result length while fetching remote files"); + + /* Read result set to local variables */ + memcpy(&chunkoff, PQgetvalue(res, 0, 1), sizeof(int64)); + chunkoff = pg_ntoh64(chunkoff); + chunksize = PQgetlength(res, 0, 2); + + filenamelen = PQgetlength(res, 0, 0); + filename = pg_malloc(filenamelen + 1); + memcpy(filename, PQgetvalue(res, 0, 0), filenamelen); + filename[filenamelen] = '\0'; + + chunk = PQgetvalue(res, 0, 2); + + /* + * If a file has been deleted on the source, remove it on the target + * as well. Note that multiple unlink() calls may happen on the same + * file if multiple data chunks are associated with it, hence ignore + * unconditionally anything missing. + */ + if (PQgetisnull(res, 0, 2)) + { + pg_log_debug("received null value for chunk for file \"%s\", file has been deleted", + filename); + remove_target_file(filename, true); + } + else + { + pg_log_debug("received chunk for file \"%s\", offset %lld, size %d", + filename, (long long int) chunkoff, chunksize); + + if (strcmp(filename, rq->path) != 0) + { + pg_fatal("received data for file \"%s\", when requested for \"%s\"", + filename, rq->path); + } + if (chunkoff != rq->offset) + pg_fatal("received data at offset %lld of file \"%s\", when requested for offset %lld", + (long long int) chunkoff, rq->path, (long long int) rq->offset); + + /* + * We should not receive more data than we requested, or + * pg_read_binary_file() messed up. We could receive less, + * though, if the file was truncated in the source after we + * checked its size. That's OK, there should be a WAL record of + * the truncation, which will get replayed when you start the + * target system for the first time after pg_rewind has completed. + */ + if (chunksize > rq->length) + pg_fatal("received more than requested for file \"%s\"", rq->path); + + open_target_file(filename, false); + + write_target_range(chunk, chunkoff, chunksize); + } + + pg_free(filename); + + PQclear(res); + chunkno++; + } + if (chunkno != src->num_requests) + pg_fatal("unexpected number of data chunks received"); + + src->num_requests = 0; +} + +/* + * Escape a string to be used as element in a text array constant + */ +static void +appendArrayEscapedString(StringInfo buf, const char *str) +{ + appendStringInfoCharMacro(buf, '\"'); + while (*str) + { + char ch = *str; + + if (ch == '"' || ch == '\\') + appendStringInfoCharMacro(buf, '\\'); + + appendStringInfoCharMacro(buf, ch); + + str++; + } + appendStringInfoCharMacro(buf, '\"'); +} + +/* + * Fetch a single file as a malloc'd buffer. + */ +static char * +libpq_fetch_file(rewind_source *source, const char *path, size_t *filesize) +{ + PGconn *conn = ((libpq_source *) source)->conn; + PGresult *res; + char *result; + int len; + const char *paramValues[1]; + + paramValues[0] = path; + res = PQexecParams(conn, "SELECT pg_read_binary_file($1)", + 1, NULL, paramValues, NULL, NULL, 1); + + if (PQresultStatus(res) != PGRES_TUPLES_OK) + pg_fatal("could not fetch remote file \"%s\": %s", + path, PQresultErrorMessage(res)); + + /* sanity check the result set */ + if (PQntuples(res) != 1 || PQgetisnull(res, 0, 0)) + pg_fatal("unexpected result set while fetching remote file \"%s\"", + path); + + /* Read result to local variables */ + len = PQgetlength(res, 0, 0); + result = pg_malloc(len + 1); + memcpy(result, PQgetvalue(res, 0, 0), len); + result[len] = '\0'; + + PQclear(res); + + pg_log_debug("fetched file \"%s\", length %d", path, len); + + if (filesize) + *filesize = len; + return result; +} + +/* + * Close a libpq source. + */ +static void +libpq_destroy(rewind_source *source) +{ + libpq_source *src = (libpq_source *) source; + + pfree(src->paths.data); + pfree(src->offsets.data); + pfree(src->lengths.data); + pfree(src); + + /* NOTE: we don't close the connection here, as it was not opened by us. */ +} |