diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:46:48 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:46:48 +0000 |
commit | 311bcfc6b3acdd6fd152798c7f287ddf74fa2a98 (patch) | |
tree | 0ec307299b1dada3701e42f4ca6eda57d708261e /src/backend/utils/adt/tsquery_rewrite.c | |
parent | Initial commit. (diff) | |
download | postgresql-15-upstream.tar.xz postgresql-15-upstream.zip |
Adding upstream version 15.4.upstream/15.4upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/backend/utils/adt/tsquery_rewrite.c')
-rw-r--r-- | src/backend/utils/adt/tsquery_rewrite.c | 462 |
1 files changed, 462 insertions, 0 deletions
diff --git a/src/backend/utils/adt/tsquery_rewrite.c b/src/backend/utils/adt/tsquery_rewrite.c new file mode 100644 index 0000000..3bef5d7 --- /dev/null +++ b/src/backend/utils/adt/tsquery_rewrite.c @@ -0,0 +1,462 @@ +/*------------------------------------------------------------------------- + * + * tsquery_rewrite.c + * Utilities for reconstructing tsquery + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/utils/adt/tsquery_rewrite.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "catalog/pg_type.h" +#include "executor/spi.h" +#include "miscadmin.h" +#include "tsearch/ts_utils.h" +#include "utils/builtins.h" + + +/* + * If "node" is equal to "ex", return a copy of "subs" instead. + * If "ex" matches a subset of node's children, return a modified version + * of "node" in which those children are replaced with a copy of "subs". + * Otherwise return "node" unmodified. + * + * The QTN_NOCHANGE bit is set in successfully modified nodes, so that + * we won't uselessly recurse into them. + * Also, set *isfind true if we make a replacement. + */ +static QTNode * +findeq(QTNode *node, QTNode *ex, QTNode *subs, bool *isfind) +{ + /* Can't match unless signature matches and node type matches. */ + if ((node->sign & ex->sign) != ex->sign || + node->valnode->type != ex->valnode->type) + return node; + + /* Ignore nodes marked NOCHANGE, too. */ + if (node->flags & QTN_NOCHANGE) + return node; + + if (node->valnode->type == QI_OPR) + { + /* Must be same operator. */ + if (node->valnode->qoperator.oper != ex->valnode->qoperator.oper) + return node; + + if (node->nchild == ex->nchild) + { + /* + * Simple case: when same number of children, match if equal. + * (This is reliable when the children were sorted earlier.) + */ + if (QTNEq(node, ex)) + { + /* Match; delete node and return a copy of subs instead. */ + QTNFree(node); + if (subs) + { + node = QTNCopy(subs); + node->flags |= QTN_NOCHANGE; + } + else + node = NULL; + *isfind = true; + } + } + else if (node->nchild > ex->nchild && ex->nchild > 0) + { + /* + * AND and OR are commutative/associative, so we should check if a + * subset of the children match. For example, if node is A|B|C, + * and ex is B|C, we have a match after we notionally convert node + * to A|(B|C). This does not work for NOT or PHRASE nodes, but we + * can't get here for those node types because they have a fixed + * number of children. + * + * Because we expect that the children are sorted, it suffices to + * make one pass through the two lists to find the matches. + */ + bool *matched; + int nmatched; + int i, + j; + + /* Assert that the subset rule is OK */ + Assert(node->valnode->qoperator.oper == OP_AND || + node->valnode->qoperator.oper == OP_OR); + + /* matched[] will record which children of node matched */ + matched = (bool *) palloc0(node->nchild * sizeof(bool)); + nmatched = 0; + i = j = 0; + while (i < node->nchild && j < ex->nchild) + { + int cmp = QTNodeCompare(node->child[i], ex->child[j]); + + if (cmp == 0) + { + /* match! */ + matched[i] = true; + nmatched++; + i++, j++; + } + else if (cmp < 0) + { + /* node->child[i] has no match, ignore it */ + i++; + } + else + { + /* ex->child[j] has no match; we can give up immediately */ + break; + } + } + + if (nmatched == ex->nchild) + { + /* collapse out the matched children of node */ + j = 0; + for (i = 0; i < node->nchild; i++) + { + if (matched[i]) + QTNFree(node->child[i]); + else + node->child[j++] = node->child[i]; + } + + /* and instead insert a copy of subs */ + if (subs) + { + subs = QTNCopy(subs); + subs->flags |= QTN_NOCHANGE; + node->child[j++] = subs; + } + + node->nchild = j; + + /* + * At this point we might have a node with zero or one child, + * which should be simplified. But we leave it to our caller + * (dofindsubquery) to take care of that. + */ + + /* + * Re-sort the node to put new child in the right place. This + * is a bit bogus, because it won't matter for findsubquery's + * remaining processing, and it's insufficient to prepare the + * tree for another search (we would need to re-flatten as + * well, and we don't want to do that because we'd lose the + * QTN_NOCHANGE marking on the new child). But it's needed to + * keep the results the same as the regression tests expect. + */ + QTNSort(node); + + *isfind = true; + } + + pfree(matched); + } + } + else + { + Assert(node->valnode->type == QI_VAL); + + if (node->valnode->qoperand.valcrc != ex->valnode->qoperand.valcrc) + return node; + else if (QTNEq(node, ex)) + { + QTNFree(node); + if (subs) + { + node = QTNCopy(subs); + node->flags |= QTN_NOCHANGE; + } + else + { + node = NULL; + } + *isfind = true; + } + } + + return node; +} + +/* + * Recursive guts of findsubquery(): attempt to replace "ex" with "subs" + * at the root node, and if we failed to do so, recursively match against + * child nodes. + * + * Delete any void subtrees resulting from the replacement. + * In the following example '5' is replaced by empty operand: + * + * AND -> 6 + * / \ + * 5 OR + * / \ + * 6 5 + */ +static QTNode * +dofindsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind) +{ + /* since this function recurses, it could be driven to stack overflow. */ + check_stack_depth(); + + /* also, since it's a bit expensive, let's check for query cancel. */ + CHECK_FOR_INTERRUPTS(); + + /* match at the node itself */ + root = findeq(root, ex, subs, isfind); + + /* unless we matched here, consider matches at child nodes */ + if (root && (root->flags & QTN_NOCHANGE) == 0 && + root->valnode->type == QI_OPR) + { + int i, + j = 0; + + /* + * Any subtrees that are replaced by NULL must be dropped from the + * tree. + */ + for (i = 0; i < root->nchild; i++) + { + root->child[j] = dofindsubquery(root->child[i], ex, subs, isfind); + if (root->child[j]) + j++; + } + + root->nchild = j; + + /* + * If we have just zero or one remaining child node, simplify out this + * operator node. + */ + if (root->nchild == 0) + { + QTNFree(root); + root = NULL; + } + else if (root->nchild == 1 && root->valnode->qoperator.oper != OP_NOT) + { + QTNode *nroot = root->child[0]; + + pfree(root); + root = nroot; + } + } + + return root; +} + +/* + * Substitute "subs" for "ex" throughout the QTNode tree at root. + * + * If isfind isn't NULL, set *isfind to show whether we made any substitution. + * + * Both "root" and "ex" must have been through QTNTernary and QTNSort + * to ensure reliable matching. + */ +QTNode * +findsubquery(QTNode *root, QTNode *ex, QTNode *subs, bool *isfind) +{ + bool DidFind = false; + + root = dofindsubquery(root, ex, subs, &DidFind); + + if (isfind) + *isfind = DidFind; + + return root; +} + +Datum +tsquery_rewrite_query(PG_FUNCTION_ARGS) +{ + TSQuery query = PG_GETARG_TSQUERY_COPY(0); + text *in = PG_GETARG_TEXT_PP(1); + TSQuery rewrited = query; + MemoryContext outercontext = CurrentMemoryContext; + MemoryContext oldcontext; + QTNode *tree; + char *buf; + SPIPlanPtr plan; + Portal portal; + bool isnull; + + if (query->size == 0) + { + PG_FREE_IF_COPY(in, 1); + PG_RETURN_POINTER(rewrited); + } + + tree = QT2QTN(GETQUERY(query), GETOPERAND(query)); + QTNTernary(tree); + QTNSort(tree); + + buf = text_to_cstring(in); + + SPI_connect(); + + if ((plan = SPI_prepare(buf, 0, NULL)) == NULL) + elog(ERROR, "SPI_prepare(\"%s\") failed", buf); + + if ((portal = SPI_cursor_open(NULL, plan, NULL, NULL, true)) == NULL) + elog(ERROR, "SPI_cursor_open(\"%s\") failed", buf); + + SPI_cursor_fetch(portal, true, 100); + + if (SPI_tuptable == NULL || + SPI_tuptable->tupdesc->natts != 2 || + SPI_gettypeid(SPI_tuptable->tupdesc, 1) != TSQUERYOID || + SPI_gettypeid(SPI_tuptable->tupdesc, 2) != TSQUERYOID) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("ts_rewrite query must return two tsquery columns"))); + + while (SPI_processed > 0 && tree) + { + uint64 i; + + for (i = 0; i < SPI_processed && tree; i++) + { + Datum qdata = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 1, &isnull); + Datum sdata; + + if (isnull) + continue; + + sdata = SPI_getbinval(SPI_tuptable->vals[i], SPI_tuptable->tupdesc, 2, &isnull); + + if (!isnull) + { + TSQuery qtex = DatumGetTSQuery(qdata); + TSQuery qtsubs = DatumGetTSQuery(sdata); + QTNode *qex, + *qsubs = NULL; + + if (qtex->size == 0) + { + if (qtex != (TSQuery) DatumGetPointer(qdata)) + pfree(qtex); + if (qtsubs != (TSQuery) DatumGetPointer(sdata)) + pfree(qtsubs); + continue; + } + + qex = QT2QTN(GETQUERY(qtex), GETOPERAND(qtex)); + + QTNTernary(qex); + QTNSort(qex); + + if (qtsubs->size) + qsubs = QT2QTN(GETQUERY(qtsubs), GETOPERAND(qtsubs)); + + oldcontext = MemoryContextSwitchTo(outercontext); + tree = findsubquery(tree, qex, qsubs, NULL); + MemoryContextSwitchTo(oldcontext); + + QTNFree(qex); + if (qtex != (TSQuery) DatumGetPointer(qdata)) + pfree(qtex); + QTNFree(qsubs); + if (qtsubs != (TSQuery) DatumGetPointer(sdata)) + pfree(qtsubs); + + if (tree) + { + /* ready the tree for another pass */ + QTNClearFlags(tree, QTN_NOCHANGE); + QTNTernary(tree); + QTNSort(tree); + } + } + } + + SPI_freetuptable(SPI_tuptable); + SPI_cursor_fetch(portal, true, 100); + } + + SPI_freetuptable(SPI_tuptable); + SPI_cursor_close(portal); + SPI_freeplan(plan); + SPI_finish(); + + if (tree) + { + QTNBinary(tree); + rewrited = QTN2QT(tree); + QTNFree(tree); + PG_FREE_IF_COPY(query, 0); + } + else + { + SET_VARSIZE(rewrited, HDRSIZETQ); + rewrited->size = 0; + } + + pfree(buf); + PG_FREE_IF_COPY(in, 1); + PG_RETURN_POINTER(rewrited); +} + +Datum +tsquery_rewrite(PG_FUNCTION_ARGS) +{ + TSQuery query = PG_GETARG_TSQUERY_COPY(0); + TSQuery ex = PG_GETARG_TSQUERY(1); + TSQuery subst = PG_GETARG_TSQUERY(2); + TSQuery rewrited = query; + QTNode *tree, + *qex, + *subs = NULL; + + if (query->size == 0 || ex->size == 0) + { + PG_FREE_IF_COPY(ex, 1); + PG_FREE_IF_COPY(subst, 2); + PG_RETURN_POINTER(rewrited); + } + + tree = QT2QTN(GETQUERY(query), GETOPERAND(query)); + QTNTernary(tree); + QTNSort(tree); + + qex = QT2QTN(GETQUERY(ex), GETOPERAND(ex)); + QTNTernary(qex); + QTNSort(qex); + + if (subst->size) + subs = QT2QTN(GETQUERY(subst), GETOPERAND(subst)); + + tree = findsubquery(tree, qex, subs, NULL); + + QTNFree(qex); + QTNFree(subs); + + if (!tree) + { + SET_VARSIZE(rewrited, HDRSIZETQ); + rewrited->size = 0; + PG_FREE_IF_COPY(ex, 1); + PG_FREE_IF_COPY(subst, 2); + PG_RETURN_POINTER(rewrited); + } + else + { + QTNBinary(tree); + rewrited = QTN2QT(tree); + QTNFree(tree); + } + + PG_FREE_IF_COPY(query, 0); + PG_FREE_IF_COPY(ex, 1); + PG_FREE_IF_COPY(subst, 2); + PG_RETURN_POINTER(rewrited); +} |