diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:46:48 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-16 19:46:48 +0000 |
commit | 311bcfc6b3acdd6fd152798c7f287ddf74fa2a98 (patch) | |
tree | 0ec307299b1dada3701e42f4ca6eda57d708261e /src/backend/utils/adt/jsonpath_gram.y | |
parent | Initial commit. (diff) | |
download | postgresql-15-upstream.tar.xz postgresql-15-upstream.zip |
Adding upstream version 15.4.upstream/15.4upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/backend/utils/adt/jsonpath_gram.y')
-rw-r--r-- | src/backend/utils/adt/jsonpath_gram.y | 606 |
1 files changed, 606 insertions, 0 deletions
diff --git a/src/backend/utils/adt/jsonpath_gram.y b/src/backend/utils/adt/jsonpath_gram.y new file mode 100644 index 0000000..91e4308 --- /dev/null +++ b/src/backend/utils/adt/jsonpath_gram.y @@ -0,0 +1,606 @@ +%{ +/*------------------------------------------------------------------------- + * + * jsonpath_gram.y + * Grammar definitions for jsonpath datatype + * + * Transforms tokenized jsonpath into tree of JsonPathParseItem structs. + * + * Copyright (c) 2019-2022, PostgreSQL Global Development Group + * + * IDENTIFICATION + * src/backend/utils/adt/jsonpath_gram.y + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "catalog/pg_collation.h" +#include "fmgr.h" +#include "miscadmin.h" +#include "nodes/pg_list.h" +#include "regex/regex.h" +#include "utils/builtins.h" +#include "utils/jsonpath.h" + +/* struct JsonPathString is shared between scan and gram */ +typedef struct JsonPathString +{ + char *val; + int len; + int total; +} JsonPathString; + +union YYSTYPE; + +/* flex 2.5.4 doesn't bother with a decl for this */ +int jsonpath_yylex(union YYSTYPE *yylval_param); +int jsonpath_yyparse(JsonPathParseResult **result); +void jsonpath_yyerror(JsonPathParseResult **result, const char *message); + +static JsonPathParseItem *makeItemType(JsonPathItemType type); +static JsonPathParseItem *makeItemString(JsonPathString *s); +static JsonPathParseItem *makeItemVariable(JsonPathString *s); +static JsonPathParseItem *makeItemKey(JsonPathString *s); +static JsonPathParseItem *makeItemNumeric(JsonPathString *s); +static JsonPathParseItem *makeItemBool(bool val); +static JsonPathParseItem *makeItemBinary(JsonPathItemType type, + JsonPathParseItem *la, + JsonPathParseItem *ra); +static JsonPathParseItem *makeItemUnary(JsonPathItemType type, + JsonPathParseItem *a); +static JsonPathParseItem *makeItemList(List *list); +static JsonPathParseItem *makeIndexArray(List *list); +static JsonPathParseItem *makeAny(int first, int last); +static JsonPathParseItem *makeItemLikeRegex(JsonPathParseItem *expr, + JsonPathString *pattern, + JsonPathString *flags); + +/* + * Bison doesn't allocate anything that needs to live across parser calls, + * so we can easily have it use palloc instead of malloc. This prevents + * memory leaks if we error out during parsing. Note this only works with + * bison >= 2.0. However, in bison 1.875 the default is to use alloca() + * if possible, so there's not really much problem anyhow, at least if + * you're building with gcc. + */ +#define YYMALLOC palloc +#define YYFREE pfree + +%} + +/* BISON Declarations */ +%pure-parser +%expect 0 +%name-prefix="jsonpath_yy" +%parse-param {JsonPathParseResult **result} + +%union +{ + JsonPathString str; + List *elems; /* list of JsonPathParseItem */ + List *indexs; /* list of integers */ + JsonPathParseItem *value; + JsonPathParseResult *result; + JsonPathItemType optype; + bool boolean; + int integer; +} + +%token <str> TO_P NULL_P TRUE_P FALSE_P IS_P UNKNOWN_P EXISTS_P +%token <str> IDENT_P STRING_P NUMERIC_P INT_P VARIABLE_P +%token <str> OR_P AND_P NOT_P +%token <str> LESS_P LESSEQUAL_P EQUAL_P NOTEQUAL_P GREATEREQUAL_P GREATER_P +%token <str> ANY_P STRICT_P LAX_P LAST_P STARTS_P WITH_P LIKE_REGEX_P FLAG_P +%token <str> ABS_P SIZE_P TYPE_P FLOOR_P DOUBLE_P CEILING_P KEYVALUE_P +%token <str> DATETIME_P + +%type <result> result + +%type <value> scalar_value path_primary expr array_accessor + any_path accessor_op key predicate delimited_predicate + index_elem starts_with_initial expr_or_predicate + datetime_template opt_datetime_template + +%type <elems> accessor_expr + +%type <indexs> index_list + +%type <optype> comp_op method + +%type <boolean> mode + +%type <str> key_name + +%type <integer> any_level + +%left OR_P +%left AND_P +%right NOT_P +%left '+' '-' +%left '*' '/' '%' +%left UMINUS +%nonassoc '(' ')' + +/* Grammar follows */ +%% + +result: + mode expr_or_predicate { + *result = palloc(sizeof(JsonPathParseResult)); + (*result)->expr = $2; + (*result)->lax = $1; + (void) yynerrs; + } + | /* EMPTY */ { *result = NULL; } + ; + +expr_or_predicate: + expr { $$ = $1; } + | predicate { $$ = $1; } + ; + +mode: + STRICT_P { $$ = false; } + | LAX_P { $$ = true; } + | /* EMPTY */ { $$ = true; } + ; + +scalar_value: + STRING_P { $$ = makeItemString(&$1); } + | NULL_P { $$ = makeItemString(NULL); } + | TRUE_P { $$ = makeItemBool(true); } + | FALSE_P { $$ = makeItemBool(false); } + | NUMERIC_P { $$ = makeItemNumeric(&$1); } + | INT_P { $$ = makeItemNumeric(&$1); } + | VARIABLE_P { $$ = makeItemVariable(&$1); } + ; + +comp_op: + EQUAL_P { $$ = jpiEqual; } + | NOTEQUAL_P { $$ = jpiNotEqual; } + | LESS_P { $$ = jpiLess; } + | GREATER_P { $$ = jpiGreater; } + | LESSEQUAL_P { $$ = jpiLessOrEqual; } + | GREATEREQUAL_P { $$ = jpiGreaterOrEqual; } + ; + +delimited_predicate: + '(' predicate ')' { $$ = $2; } + | EXISTS_P '(' expr ')' { $$ = makeItemUnary(jpiExists, $3); } + ; + +predicate: + delimited_predicate { $$ = $1; } + | expr comp_op expr { $$ = makeItemBinary($2, $1, $3); } + | predicate AND_P predicate { $$ = makeItemBinary(jpiAnd, $1, $3); } + | predicate OR_P predicate { $$ = makeItemBinary(jpiOr, $1, $3); } + | NOT_P delimited_predicate { $$ = makeItemUnary(jpiNot, $2); } + | '(' predicate ')' IS_P UNKNOWN_P + { $$ = makeItemUnary(jpiIsUnknown, $2); } + | expr STARTS_P WITH_P starts_with_initial + { $$ = makeItemBinary(jpiStartsWith, $1, $4); } + | expr LIKE_REGEX_P STRING_P { $$ = makeItemLikeRegex($1, &$3, NULL); } + | expr LIKE_REGEX_P STRING_P FLAG_P STRING_P + { $$ = makeItemLikeRegex($1, &$3, &$5); } + ; + +starts_with_initial: + STRING_P { $$ = makeItemString(&$1); } + | VARIABLE_P { $$ = makeItemVariable(&$1); } + ; + +path_primary: + scalar_value { $$ = $1; } + | '$' { $$ = makeItemType(jpiRoot); } + | '@' { $$ = makeItemType(jpiCurrent); } + | LAST_P { $$ = makeItemType(jpiLast); } + ; + +accessor_expr: + path_primary { $$ = list_make1($1); } + | '(' expr ')' accessor_op { $$ = list_make2($2, $4); } + | '(' predicate ')' accessor_op { $$ = list_make2($2, $4); } + | accessor_expr accessor_op { $$ = lappend($1, $2); } + ; + +expr: + accessor_expr { $$ = makeItemList($1); } + | '(' expr ')' { $$ = $2; } + | '+' expr %prec UMINUS { $$ = makeItemUnary(jpiPlus, $2); } + | '-' expr %prec UMINUS { $$ = makeItemUnary(jpiMinus, $2); } + | expr '+' expr { $$ = makeItemBinary(jpiAdd, $1, $3); } + | expr '-' expr { $$ = makeItemBinary(jpiSub, $1, $3); } + | expr '*' expr { $$ = makeItemBinary(jpiMul, $1, $3); } + | expr '/' expr { $$ = makeItemBinary(jpiDiv, $1, $3); } + | expr '%' expr { $$ = makeItemBinary(jpiMod, $1, $3); } + ; + +index_elem: + expr { $$ = makeItemBinary(jpiSubscript, $1, NULL); } + | expr TO_P expr { $$ = makeItemBinary(jpiSubscript, $1, $3); } + ; + +index_list: + index_elem { $$ = list_make1($1); } + | index_list ',' index_elem { $$ = lappend($1, $3); } + ; + +array_accessor: + '[' '*' ']' { $$ = makeItemType(jpiAnyArray); } + | '[' index_list ']' { $$ = makeIndexArray($2); } + ; + +any_level: + INT_P { $$ = pg_strtoint32($1.val); } + | LAST_P { $$ = -1; } + ; + +any_path: + ANY_P { $$ = makeAny(0, -1); } + | ANY_P '{' any_level '}' { $$ = makeAny($3, $3); } + | ANY_P '{' any_level TO_P any_level '}' + { $$ = makeAny($3, $5); } + ; + +accessor_op: + '.' key { $$ = $2; } + | '.' '*' { $$ = makeItemType(jpiAnyKey); } + | array_accessor { $$ = $1; } + | '.' any_path { $$ = $2; } + | '.' method '(' ')' { $$ = makeItemType($2); } + | '.' DATETIME_P '(' opt_datetime_template ')' + { $$ = makeItemUnary(jpiDatetime, $4); } + | '?' '(' predicate ')' { $$ = makeItemUnary(jpiFilter, $3); } + ; + +datetime_template: + STRING_P { $$ = makeItemString(&$1); } + ; + +opt_datetime_template: + datetime_template { $$ = $1; } + | /* EMPTY */ { $$ = NULL; } + ; + +key: + key_name { $$ = makeItemKey(&$1); } + ; + +key_name: + IDENT_P + | STRING_P + | TO_P + | NULL_P + | TRUE_P + | FALSE_P + | IS_P + | UNKNOWN_P + | EXISTS_P + | STRICT_P + | LAX_P + | ABS_P + | SIZE_P + | TYPE_P + | FLOOR_P + | DOUBLE_P + | CEILING_P + | DATETIME_P + | KEYVALUE_P + | LAST_P + | STARTS_P + | WITH_P + | LIKE_REGEX_P + | FLAG_P + ; + +method: + ABS_P { $$ = jpiAbs; } + | SIZE_P { $$ = jpiSize; } + | TYPE_P { $$ = jpiType; } + | FLOOR_P { $$ = jpiFloor; } + | DOUBLE_P { $$ = jpiDouble; } + | CEILING_P { $$ = jpiCeiling; } + | KEYVALUE_P { $$ = jpiKeyValue; } + ; +%% + +/* + * The helper functions below allocate and fill JsonPathParseItem's of various + * types. + */ + +static JsonPathParseItem * +makeItemType(JsonPathItemType type) +{ + JsonPathParseItem *v = palloc(sizeof(*v)); + + CHECK_FOR_INTERRUPTS(); + + v->type = type; + v->next = NULL; + + return v; +} + +static JsonPathParseItem * +makeItemString(JsonPathString *s) +{ + JsonPathParseItem *v; + + if (s == NULL) + { + v = makeItemType(jpiNull); + } + else + { + v = makeItemType(jpiString); + v->value.string.val = s->val; + v->value.string.len = s->len; + } + + return v; +} + +static JsonPathParseItem * +makeItemVariable(JsonPathString *s) +{ + JsonPathParseItem *v; + + v = makeItemType(jpiVariable); + v->value.string.val = s->val; + v->value.string.len = s->len; + + return v; +} + +static JsonPathParseItem * +makeItemKey(JsonPathString *s) +{ + JsonPathParseItem *v; + + v = makeItemString(s); + v->type = jpiKey; + + return v; +} + +static JsonPathParseItem * +makeItemNumeric(JsonPathString *s) +{ + JsonPathParseItem *v; + + v = makeItemType(jpiNumeric); + v->value.numeric = + DatumGetNumeric(DirectFunctionCall3(numeric_in, + CStringGetDatum(s->val), + ObjectIdGetDatum(InvalidOid), + Int32GetDatum(-1))); + + return v; +} + +static JsonPathParseItem * +makeItemBool(bool val) +{ + JsonPathParseItem *v = makeItemType(jpiBool); + + v->value.boolean = val; + + return v; +} + +static JsonPathParseItem * +makeItemBinary(JsonPathItemType type, JsonPathParseItem *la, JsonPathParseItem *ra) +{ + JsonPathParseItem *v = makeItemType(type); + + v->value.args.left = la; + v->value.args.right = ra; + + return v; +} + +static JsonPathParseItem * +makeItemUnary(JsonPathItemType type, JsonPathParseItem *a) +{ + JsonPathParseItem *v; + + if (type == jpiPlus && a->type == jpiNumeric && !a->next) + return a; + + if (type == jpiMinus && a->type == jpiNumeric && !a->next) + { + v = makeItemType(jpiNumeric); + v->value.numeric = + DatumGetNumeric(DirectFunctionCall1(numeric_uminus, + NumericGetDatum(a->value.numeric))); + return v; + } + + v = makeItemType(type); + + v->value.arg = a; + + return v; +} + +static JsonPathParseItem * +makeItemList(List *list) +{ + JsonPathParseItem *head, + *end; + ListCell *cell; + + head = end = (JsonPathParseItem *) linitial(list); + + if (list_length(list) == 1) + return head; + + /* append items to the end of already existing list */ + while (end->next) + end = end->next; + + for_each_from(cell, list, 1) + { + JsonPathParseItem *c = (JsonPathParseItem *) lfirst(cell); + + end->next = c; + end = c; + } + + return head; +} + +static JsonPathParseItem * +makeIndexArray(List *list) +{ + JsonPathParseItem *v = makeItemType(jpiIndexArray); + ListCell *cell; + int i = 0; + + Assert(list_length(list) > 0); + v->value.array.nelems = list_length(list); + + v->value.array.elems = palloc(sizeof(v->value.array.elems[0]) * + v->value.array.nelems); + + foreach(cell, list) + { + JsonPathParseItem *jpi = lfirst(cell); + + Assert(jpi->type == jpiSubscript); + + v->value.array.elems[i].from = jpi->value.args.left; + v->value.array.elems[i++].to = jpi->value.args.right; + } + + return v; +} + +static JsonPathParseItem * +makeAny(int first, int last) +{ + JsonPathParseItem *v = makeItemType(jpiAny); + + v->value.anybounds.first = (first >= 0) ? first : PG_UINT32_MAX; + v->value.anybounds.last = (last >= 0) ? last : PG_UINT32_MAX; + + return v; +} + +static JsonPathParseItem * +makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern, + JsonPathString *flags) +{ + JsonPathParseItem *v = makeItemType(jpiLikeRegex); + int i; + int cflags; + + v->value.like_regex.expr = expr; + v->value.like_regex.pattern = pattern->val; + v->value.like_regex.patternlen = pattern->len; + + /* Parse the flags string, convert to bitmask. Duplicate flags are OK. */ + v->value.like_regex.flags = 0; + for (i = 0; flags && i < flags->len; i++) + { + switch (flags->val[i]) + { + case 'i': + v->value.like_regex.flags |= JSP_REGEX_ICASE; + break; + case 's': + v->value.like_regex.flags |= JSP_REGEX_DOTALL; + break; + case 'm': + v->value.like_regex.flags |= JSP_REGEX_MLINE; + break; + case 'x': + v->value.like_regex.flags |= JSP_REGEX_WSPACE; + break; + case 'q': + v->value.like_regex.flags |= JSP_REGEX_QUOTE; + break; + default: + ereport(ERROR, + (errcode(ERRCODE_SYNTAX_ERROR), + errmsg("invalid input syntax for type %s", "jsonpath"), + errdetail("Unrecognized flag character \"%.*s\" in LIKE_REGEX predicate.", + pg_mblen(flags->val + i), flags->val + i))); + break; + } + } + + /* Convert flags to what RE_compile_and_cache needs */ + cflags = jspConvertRegexFlags(v->value.like_regex.flags); + + /* check regex validity */ + (void) RE_compile_and_cache(cstring_to_text_with_len(pattern->val, + pattern->len), + cflags, DEFAULT_COLLATION_OID); + + return v; +} + +/* + * Convert from XQuery regex flags to those recognized by our regex library. + */ +int +jspConvertRegexFlags(uint32 xflags) +{ + /* By default, XQuery is very nearly the same as Spencer's AREs */ + int cflags = REG_ADVANCED; + + /* Ignore-case means the same thing, too, modulo locale issues */ + if (xflags & JSP_REGEX_ICASE) + cflags |= REG_ICASE; + + /* Per XQuery spec, if 'q' is specified then 'm', 's', 'x' are ignored */ + if (xflags & JSP_REGEX_QUOTE) + { + cflags &= ~REG_ADVANCED; + cflags |= REG_QUOTE; + } + else + { + /* Note that dotall mode is the default in POSIX */ + if (!(xflags & JSP_REGEX_DOTALL)) + cflags |= REG_NLSTOP; + if (xflags & JSP_REGEX_MLINE) + cflags |= REG_NLANCH; + + /* + * XQuery's 'x' mode is related to Spencer's expanded mode, but it's + * not really enough alike to justify treating JSP_REGEX_WSPACE as + * REG_EXPANDED. For now we treat 'x' as unimplemented; perhaps in + * future we'll modify the regex library to have an option for + * XQuery-style ignore-whitespace mode. + */ + if (xflags & JSP_REGEX_WSPACE) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("XQuery \"x\" flag (expanded regular expressions) is not implemented"))); + } + + /* + * We'll never need sub-match details at execution. While + * RE_compile_and_execute would set this flag anyway, force it on here to + * ensure that the regex cache entries created by makeItemLikeRegex are + * useful. + */ + cflags |= REG_NOSUB; + + return cflags; +} + +/* + * jsonpath_scan.l is compiled as part of jsonpath_gram.y. Currently, this is + * unavoidable because jsonpath_gram does not create a .h file to export its + * token symbols. If these files ever grow large enough to be worth compiling + * separately, that could be fixed; but for now it seems like useless + * complication. + */ + +#include "jsonpath_scan.c" |