diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:46:48 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:46:48 +0000 |
commit | 311bcfc6b3acdd6fd152798c7f287ddf74fa2a98 (patch) | |
tree | 0ec307299b1dada3701e42f4ca6eda57d708261e /src/backend/port/sysv_sema.c | |
parent | Initial commit. (diff) | |
download | postgresql-15-upstream.tar.xz postgresql-15-upstream.zip |
Adding upstream version 15.4.upstream/15.4upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/backend/port/sysv_sema.c')
-rw-r--r-- | src/backend/port/sysv_sema.c | 517 |
1 files changed, 517 insertions, 0 deletions
diff --git a/src/backend/port/sysv_sema.c b/src/backend/port/sysv_sema.c new file mode 100644 index 0000000..ea3ad6d --- /dev/null +++ b/src/backend/port/sysv_sema.c @@ -0,0 +1,517 @@ +/*------------------------------------------------------------------------- + * + * sysv_sema.c + * Implement PGSemaphores using SysV semaphore facilities + * + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/port/sysv_sema.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include <signal.h> +#include <unistd.h> +#include <sys/file.h> +#include <sys/stat.h> +#ifdef HAVE_SYS_IPC_H +#include <sys/ipc.h> +#endif +#ifdef HAVE_SYS_SEM_H +#include <sys/sem.h> +#endif + +#include "miscadmin.h" +#include "storage/ipc.h" +#include "storage/pg_sema.h" +#include "storage/shmem.h" + + +typedef struct PGSemaphoreData +{ + int semId; /* semaphore set identifier */ + int semNum; /* semaphore number within set */ +} PGSemaphoreData; + +#ifndef HAVE_UNION_SEMUN +union semun +{ + int val; + struct semid_ds *buf; + unsigned short *array; +}; +#endif + +typedef key_t IpcSemaphoreKey; /* semaphore key passed to semget(2) */ +typedef int IpcSemaphoreId; /* semaphore ID returned by semget(2) */ + +/* + * SEMAS_PER_SET is the number of useful semaphores in each semaphore set + * we allocate. It must be *less than* your kernel's SEMMSL (max semaphores + * per set) parameter, which is often around 25. (Less than, because we + * allocate one extra sema in each set for identification purposes.) + */ +#define SEMAS_PER_SET 16 + +#define IPCProtection (0600) /* access/modify by user only */ + +#define PGSemaMagic 537 /* must be less than SEMVMX */ + + +static PGSemaphore sharedSemas; /* array of PGSemaphoreData in shared memory */ +static int numSharedSemas; /* number of PGSemaphoreDatas used so far */ +static int maxSharedSemas; /* allocated size of PGSemaphoreData array */ +static IpcSemaphoreId *mySemaSets; /* IDs of sema sets acquired so far */ +static int numSemaSets; /* number of sema sets acquired so far */ +static int maxSemaSets; /* allocated size of mySemaSets array */ +static IpcSemaphoreKey nextSemaKey; /* next key to try using */ +static int nextSemaNumber; /* next free sem num in last sema set */ + + +static IpcSemaphoreId InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey, + int numSems); +static void IpcSemaphoreInitialize(IpcSemaphoreId semId, int semNum, + int value); +static void IpcSemaphoreKill(IpcSemaphoreId semId); +static int IpcSemaphoreGetValue(IpcSemaphoreId semId, int semNum); +static pid_t IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int semNum); +static IpcSemaphoreId IpcSemaphoreCreate(int numSems); +static void ReleaseSemaphores(int status, Datum arg); + + +/* + * InternalIpcSemaphoreCreate + * + * Attempt to create a new semaphore set with the specified key. + * Will fail (return -1) if such a set already exists. + * + * If we fail with a failure code other than collision-with-existing-set, + * print out an error and abort. Other types of errors suggest nonrecoverable + * problems. + */ +static IpcSemaphoreId +InternalIpcSemaphoreCreate(IpcSemaphoreKey semKey, int numSems) +{ + int semId; + + semId = semget(semKey, numSems, IPC_CREAT | IPC_EXCL | IPCProtection); + + if (semId < 0) + { + int saved_errno = errno; + + /* + * Fail quietly if error indicates a collision with existing set. One + * would expect EEXIST, given that we said IPC_EXCL, but perhaps we + * could get a permission violation instead? Also, EIDRM might occur + * if an old set is slated for destruction but not gone yet. + */ + if (saved_errno == EEXIST || saved_errno == EACCES +#ifdef EIDRM + || saved_errno == EIDRM +#endif + ) + return -1; + + /* + * Else complain and abort + */ + ereport(FATAL, + (errmsg("could not create semaphores: %m"), + errdetail("Failed system call was semget(%lu, %d, 0%o).", + (unsigned long) semKey, numSems, + IPC_CREAT | IPC_EXCL | IPCProtection), + (saved_errno == ENOSPC) ? + errhint("This error does *not* mean that you have run out of disk space. " + "It occurs when either the system limit for the maximum number of " + "semaphore sets (SEMMNI), or the system wide maximum number of " + "semaphores (SEMMNS), would be exceeded. You need to raise the " + "respective kernel parameter. Alternatively, reduce PostgreSQL's " + "consumption of semaphores by reducing its max_connections parameter.\n" + "The PostgreSQL documentation contains more information about " + "configuring your system for PostgreSQL.") : 0)); + } + + return semId; +} + +/* + * Initialize a semaphore to the specified value. + */ +static void +IpcSemaphoreInitialize(IpcSemaphoreId semId, int semNum, int value) +{ + union semun semun; + + semun.val = value; + if (semctl(semId, semNum, SETVAL, semun) < 0) + { + int saved_errno = errno; + + ereport(FATAL, + (errmsg_internal("semctl(%d, %d, SETVAL, %d) failed: %m", + semId, semNum, value), + (saved_errno == ERANGE) ? + errhint("You possibly need to raise your kernel's SEMVMX value to be at least " + "%d. Look into the PostgreSQL documentation for details.", + value) : 0)); + } +} + +/* + * IpcSemaphoreKill(semId) - removes a semaphore set + */ +static void +IpcSemaphoreKill(IpcSemaphoreId semId) +{ + union semun semun; + + semun.val = 0; /* unused, but keep compiler quiet */ + + if (semctl(semId, 0, IPC_RMID, semun) < 0) + elog(LOG, "semctl(%d, 0, IPC_RMID, ...) failed: %m", semId); +} + +/* Get the current value (semval) of the semaphore */ +static int +IpcSemaphoreGetValue(IpcSemaphoreId semId, int semNum) +{ + union semun dummy; /* for Solaris */ + + dummy.val = 0; /* unused */ + + return semctl(semId, semNum, GETVAL, dummy); +} + +/* Get the PID of the last process to do semop() on the semaphore */ +static pid_t +IpcSemaphoreGetLastPID(IpcSemaphoreId semId, int semNum) +{ + union semun dummy; /* for Solaris */ + + dummy.val = 0; /* unused */ + + return semctl(semId, semNum, GETPID, dummy); +} + + +/* + * Create a semaphore set with the given number of useful semaphores + * (an additional sema is actually allocated to serve as identifier). + * Dead Postgres sema sets are recycled if found, but we do not fail + * upon collision with non-Postgres sema sets. + * + * The idea here is to detect and re-use keys that may have been assigned + * by a crashed postmaster or backend. + */ +static IpcSemaphoreId +IpcSemaphoreCreate(int numSems) +{ + IpcSemaphoreId semId; + union semun semun; + PGSemaphoreData mysema; + + /* Loop till we find a free IPC key */ + for (nextSemaKey++;; nextSemaKey++) + { + pid_t creatorPID; + + /* Try to create new semaphore set */ + semId = InternalIpcSemaphoreCreate(nextSemaKey, numSems + 1); + if (semId >= 0) + break; /* successful create */ + + /* See if it looks to be leftover from a dead Postgres process */ + semId = semget(nextSemaKey, numSems + 1, 0); + if (semId < 0) + continue; /* failed: must be some other app's */ + if (IpcSemaphoreGetValue(semId, numSems) != PGSemaMagic) + continue; /* sema belongs to a non-Postgres app */ + + /* + * If the creator PID is my own PID or does not belong to any extant + * process, it's safe to zap it. + */ + creatorPID = IpcSemaphoreGetLastPID(semId, numSems); + if (creatorPID <= 0) + continue; /* oops, GETPID failed */ + if (creatorPID != getpid()) + { + if (kill(creatorPID, 0) == 0 || errno != ESRCH) + continue; /* sema belongs to a live process */ + } + + /* + * The sema set appears to be from a dead Postgres process, or from a + * previous cycle of life in this same process. Zap it, if possible. + * This probably shouldn't fail, but if it does, assume the sema set + * belongs to someone else after all, and continue quietly. + */ + semun.val = 0; /* unused, but keep compiler quiet */ + if (semctl(semId, 0, IPC_RMID, semun) < 0) + continue; + + /* + * Now try again to create the sema set. + */ + semId = InternalIpcSemaphoreCreate(nextSemaKey, numSems + 1); + if (semId >= 0) + break; /* successful create */ + + /* + * Can only get here if some other process managed to create the same + * sema key before we did. Let him have that one, loop around to try + * next key. + */ + } + + /* + * OK, we created a new sema set. Mark it as created by this process. We + * do this by setting the spare semaphore to PGSemaMagic-1 and then + * incrementing it with semop(). That leaves it with value PGSemaMagic + * and sempid referencing this process. + */ + IpcSemaphoreInitialize(semId, numSems, PGSemaMagic - 1); + mysema.semId = semId; + mysema.semNum = numSems; + PGSemaphoreUnlock(&mysema); + + return semId; +} + + +/* + * Report amount of shared memory needed for semaphores + */ +Size +PGSemaphoreShmemSize(int maxSemas) +{ + return mul_size(maxSemas, sizeof(PGSemaphoreData)); +} + +/* + * PGReserveSemaphores --- initialize semaphore support + * + * This is called during postmaster start or shared memory reinitialization. + * It should do whatever is needed to be able to support up to maxSemas + * subsequent PGSemaphoreCreate calls. Also, if any system resources + * are acquired here or in PGSemaphoreCreate, register an on_shmem_exit + * callback to release them. + * + * In the SysV implementation, we acquire semaphore sets on-demand; the + * maxSemas parameter is just used to size the arrays. There is an array + * of PGSemaphoreData structs in shared memory, and a postmaster-local array + * with one entry per SysV semaphore set, which we use for releasing the + * semaphore sets when done. (This design ensures that postmaster shutdown + * doesn't rely on the contents of shared memory, which a failed backend might + * have clobbered.) + */ +void +PGReserveSemaphores(int maxSemas) +{ + struct stat statbuf; + + /* + * We use the data directory's inode number to seed the search for free + * semaphore keys. This minimizes the odds of collision with other + * postmasters, while maximizing the odds that we will detect and clean up + * semaphores left over from a crashed postmaster in our own directory. + */ + if (stat(DataDir, &statbuf) < 0) + ereport(FATAL, + (errcode_for_file_access(), + errmsg("could not stat data directory \"%s\": %m", + DataDir))); + + /* + * We must use ShmemAllocUnlocked(), since the spinlock protecting + * ShmemAlloc() won't be ready yet. (This ordering is necessary when we + * are emulating spinlocks with semaphores.) + */ + sharedSemas = (PGSemaphore) + ShmemAllocUnlocked(PGSemaphoreShmemSize(maxSemas)); + numSharedSemas = 0; + maxSharedSemas = maxSemas; + + maxSemaSets = (maxSemas + SEMAS_PER_SET - 1) / SEMAS_PER_SET; + mySemaSets = (IpcSemaphoreId *) + malloc(maxSemaSets * sizeof(IpcSemaphoreId)); + if (mySemaSets == NULL) + elog(PANIC, "out of memory"); + numSemaSets = 0; + nextSemaKey = statbuf.st_ino; + nextSemaNumber = SEMAS_PER_SET; /* force sema set alloc on 1st call */ + + on_shmem_exit(ReleaseSemaphores, 0); +} + +/* + * Release semaphores at shutdown or shmem reinitialization + * + * (called as an on_shmem_exit callback, hence funny argument list) + */ +static void +ReleaseSemaphores(int status, Datum arg) +{ + int i; + + for (i = 0; i < numSemaSets; i++) + IpcSemaphoreKill(mySemaSets[i]); + free(mySemaSets); +} + +/* + * PGSemaphoreCreate + * + * Allocate a PGSemaphore structure with initial count 1 + */ +PGSemaphore +PGSemaphoreCreate(void) +{ + PGSemaphore sema; + + /* Can't do this in a backend, because static state is postmaster's */ + Assert(!IsUnderPostmaster); + + if (nextSemaNumber >= SEMAS_PER_SET) + { + /* Time to allocate another semaphore set */ + if (numSemaSets >= maxSemaSets) + elog(PANIC, "too many semaphores created"); + mySemaSets[numSemaSets] = IpcSemaphoreCreate(SEMAS_PER_SET); + numSemaSets++; + nextSemaNumber = 0; + } + /* Use the next shared PGSemaphoreData */ + if (numSharedSemas >= maxSharedSemas) + elog(PANIC, "too many semaphores created"); + sema = &sharedSemas[numSharedSemas++]; + /* Assign the next free semaphore in the current set */ + sema->semId = mySemaSets[numSemaSets - 1]; + sema->semNum = nextSemaNumber++; + /* Initialize it to count 1 */ + IpcSemaphoreInitialize(sema->semId, sema->semNum, 1); + + return sema; +} + +/* + * PGSemaphoreReset + * + * Reset a previously-initialized PGSemaphore to have count 0 + */ +void +PGSemaphoreReset(PGSemaphore sema) +{ + IpcSemaphoreInitialize(sema->semId, sema->semNum, 0); +} + +/* + * PGSemaphoreLock + * + * Lock a semaphore (decrement count), blocking if count would be < 0 + */ +void +PGSemaphoreLock(PGSemaphore sema) +{ + int errStatus; + struct sembuf sops; + + sops.sem_op = -1; /* decrement */ + sops.sem_flg = 0; + sops.sem_num = sema->semNum; + + /* + * Note: if errStatus is -1 and errno == EINTR then it means we returned + * from the operation prematurely because we were sent a signal. So we + * try and lock the semaphore again. + * + * We used to check interrupts here, but that required servicing + * interrupts directly from signal handlers. Which is hard to do safely + * and portably. + */ + do + { + errStatus = semop(sema->semId, &sops, 1); + } while (errStatus < 0 && errno == EINTR); + + if (errStatus < 0) + elog(FATAL, "semop(id=%d) failed: %m", sema->semId); +} + +/* + * PGSemaphoreUnlock + * + * Unlock a semaphore (increment count) + */ +void +PGSemaphoreUnlock(PGSemaphore sema) +{ + int errStatus; + struct sembuf sops; + + sops.sem_op = 1; /* increment */ + sops.sem_flg = 0; + sops.sem_num = sema->semNum; + + /* + * Note: if errStatus is -1 and errno == EINTR then it means we returned + * from the operation prematurely because we were sent a signal. So we + * try and unlock the semaphore again. Not clear this can really happen, + * but might as well cope. + */ + do + { + errStatus = semop(sema->semId, &sops, 1); + } while (errStatus < 0 && errno == EINTR); + + if (errStatus < 0) + elog(FATAL, "semop(id=%d) failed: %m", sema->semId); +} + +/* + * PGSemaphoreTryLock + * + * Lock a semaphore only if able to do so without blocking + */ +bool +PGSemaphoreTryLock(PGSemaphore sema) +{ + int errStatus; + struct sembuf sops; + + sops.sem_op = -1; /* decrement */ + sops.sem_flg = IPC_NOWAIT; /* but don't block */ + sops.sem_num = sema->semNum; + + /* + * Note: if errStatus is -1 and errno == EINTR then it means we returned + * from the operation prematurely because we were sent a signal. So we + * try and lock the semaphore again. + */ + do + { + errStatus = semop(sema->semId, &sops, 1); + } while (errStatus < 0 && errno == EINTR); + + if (errStatus < 0) + { + /* Expect EAGAIN or EWOULDBLOCK (platform-dependent) */ +#ifdef EAGAIN + if (errno == EAGAIN) + return false; /* failed to lock it */ +#endif +#if defined(EWOULDBLOCK) && (!defined(EAGAIN) || (EWOULDBLOCK != EAGAIN)) + if (errno == EWOULDBLOCK) + return false; /* failed to lock it */ +#endif + /* Otherwise we got trouble */ + elog(FATAL, "semop(id=%d) failed: %m", sema->semId); + } + + return true; +} |