summaryrefslogtreecommitdiffstats
path: root/src/backend/utils/adt/jsonpath_gram.y
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/utils/adt/jsonpath_gram.y')
-rw-r--r--src/backend/utils/adt/jsonpath_gram.y606
1 files changed, 606 insertions, 0 deletions
diff --git a/src/backend/utils/adt/jsonpath_gram.y b/src/backend/utils/adt/jsonpath_gram.y
new file mode 100644
index 0000000..91e4308
--- /dev/null
+++ b/src/backend/utils/adt/jsonpath_gram.y
@@ -0,0 +1,606 @@
+%{
+/*-------------------------------------------------------------------------
+ *
+ * jsonpath_gram.y
+ * Grammar definitions for jsonpath datatype
+ *
+ * Transforms tokenized jsonpath into tree of JsonPathParseItem structs.
+ *
+ * Copyright (c) 2019-2022, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/jsonpath_gram.y
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "catalog/pg_collation.h"
+#include "fmgr.h"
+#include "miscadmin.h"
+#include "nodes/pg_list.h"
+#include "regex/regex.h"
+#include "utils/builtins.h"
+#include "utils/jsonpath.h"
+
+/* struct JsonPathString is shared between scan and gram */
+typedef struct JsonPathString
+{
+ char *val;
+ int len;
+ int total;
+} JsonPathString;
+
+union YYSTYPE;
+
+/* flex 2.5.4 doesn't bother with a decl for this */
+int jsonpath_yylex(union YYSTYPE *yylval_param);
+int jsonpath_yyparse(JsonPathParseResult **result);
+void jsonpath_yyerror(JsonPathParseResult **result, const char *message);
+
+static JsonPathParseItem *makeItemType(JsonPathItemType type);
+static JsonPathParseItem *makeItemString(JsonPathString *s);
+static JsonPathParseItem *makeItemVariable(JsonPathString *s);
+static JsonPathParseItem *makeItemKey(JsonPathString *s);
+static JsonPathParseItem *makeItemNumeric(JsonPathString *s);
+static JsonPathParseItem *makeItemBool(bool val);
+static JsonPathParseItem *makeItemBinary(JsonPathItemType type,
+ JsonPathParseItem *la,
+ JsonPathParseItem *ra);
+static JsonPathParseItem *makeItemUnary(JsonPathItemType type,
+ JsonPathParseItem *a);
+static JsonPathParseItem *makeItemList(List *list);
+static JsonPathParseItem *makeIndexArray(List *list);
+static JsonPathParseItem *makeAny(int first, int last);
+static JsonPathParseItem *makeItemLikeRegex(JsonPathParseItem *expr,
+ JsonPathString *pattern,
+ JsonPathString *flags);
+
+/*
+ * Bison doesn't allocate anything that needs to live across parser calls,
+ * so we can easily have it use palloc instead of malloc. This prevents
+ * memory leaks if we error out during parsing. Note this only works with
+ * bison >= 2.0. However, in bison 1.875 the default is to use alloca()
+ * if possible, so there's not really much problem anyhow, at least if
+ * you're building with gcc.
+ */
+#define YYMALLOC palloc
+#define YYFREE pfree
+
+%}
+
+/* BISON Declarations */
+%pure-parser
+%expect 0
+%name-prefix="jsonpath_yy"
+%parse-param {JsonPathParseResult **result}
+
+%union
+{
+ JsonPathString str;
+ List *elems; /* list of JsonPathParseItem */
+ List *indexs; /* list of integers */
+ JsonPathParseItem *value;
+ JsonPathParseResult *result;
+ JsonPathItemType optype;
+ bool boolean;
+ int integer;
+}
+
+%token <str> TO_P NULL_P TRUE_P FALSE_P IS_P UNKNOWN_P EXISTS_P
+%token <str> IDENT_P STRING_P NUMERIC_P INT_P VARIABLE_P
+%token <str> OR_P AND_P NOT_P
+%token <str> LESS_P LESSEQUAL_P EQUAL_P NOTEQUAL_P GREATEREQUAL_P GREATER_P
+%token <str> ANY_P STRICT_P LAX_P LAST_P STARTS_P WITH_P LIKE_REGEX_P FLAG_P
+%token <str> ABS_P SIZE_P TYPE_P FLOOR_P DOUBLE_P CEILING_P KEYVALUE_P
+%token <str> DATETIME_P
+
+%type <result> result
+
+%type <value> scalar_value path_primary expr array_accessor
+ any_path accessor_op key predicate delimited_predicate
+ index_elem starts_with_initial expr_or_predicate
+ datetime_template opt_datetime_template
+
+%type <elems> accessor_expr
+
+%type <indexs> index_list
+
+%type <optype> comp_op method
+
+%type <boolean> mode
+
+%type <str> key_name
+
+%type <integer> any_level
+
+%left OR_P
+%left AND_P
+%right NOT_P
+%left '+' '-'
+%left '*' '/' '%'
+%left UMINUS
+%nonassoc '(' ')'
+
+/* Grammar follows */
+%%
+
+result:
+ mode expr_or_predicate {
+ *result = palloc(sizeof(JsonPathParseResult));
+ (*result)->expr = $2;
+ (*result)->lax = $1;
+ (void) yynerrs;
+ }
+ | /* EMPTY */ { *result = NULL; }
+ ;
+
+expr_or_predicate:
+ expr { $$ = $1; }
+ | predicate { $$ = $1; }
+ ;
+
+mode:
+ STRICT_P { $$ = false; }
+ | LAX_P { $$ = true; }
+ | /* EMPTY */ { $$ = true; }
+ ;
+
+scalar_value:
+ STRING_P { $$ = makeItemString(&$1); }
+ | NULL_P { $$ = makeItemString(NULL); }
+ | TRUE_P { $$ = makeItemBool(true); }
+ | FALSE_P { $$ = makeItemBool(false); }
+ | NUMERIC_P { $$ = makeItemNumeric(&$1); }
+ | INT_P { $$ = makeItemNumeric(&$1); }
+ | VARIABLE_P { $$ = makeItemVariable(&$1); }
+ ;
+
+comp_op:
+ EQUAL_P { $$ = jpiEqual; }
+ | NOTEQUAL_P { $$ = jpiNotEqual; }
+ | LESS_P { $$ = jpiLess; }
+ | GREATER_P { $$ = jpiGreater; }
+ | LESSEQUAL_P { $$ = jpiLessOrEqual; }
+ | GREATEREQUAL_P { $$ = jpiGreaterOrEqual; }
+ ;
+
+delimited_predicate:
+ '(' predicate ')' { $$ = $2; }
+ | EXISTS_P '(' expr ')' { $$ = makeItemUnary(jpiExists, $3); }
+ ;
+
+predicate:
+ delimited_predicate { $$ = $1; }
+ | expr comp_op expr { $$ = makeItemBinary($2, $1, $3); }
+ | predicate AND_P predicate { $$ = makeItemBinary(jpiAnd, $1, $3); }
+ | predicate OR_P predicate { $$ = makeItemBinary(jpiOr, $1, $3); }
+ | NOT_P delimited_predicate { $$ = makeItemUnary(jpiNot, $2); }
+ | '(' predicate ')' IS_P UNKNOWN_P
+ { $$ = makeItemUnary(jpiIsUnknown, $2); }
+ | expr STARTS_P WITH_P starts_with_initial
+ { $$ = makeItemBinary(jpiStartsWith, $1, $4); }
+ | expr LIKE_REGEX_P STRING_P { $$ = makeItemLikeRegex($1, &$3, NULL); }
+ | expr LIKE_REGEX_P STRING_P FLAG_P STRING_P
+ { $$ = makeItemLikeRegex($1, &$3, &$5); }
+ ;
+
+starts_with_initial:
+ STRING_P { $$ = makeItemString(&$1); }
+ | VARIABLE_P { $$ = makeItemVariable(&$1); }
+ ;
+
+path_primary:
+ scalar_value { $$ = $1; }
+ | '$' { $$ = makeItemType(jpiRoot); }
+ | '@' { $$ = makeItemType(jpiCurrent); }
+ | LAST_P { $$ = makeItemType(jpiLast); }
+ ;
+
+accessor_expr:
+ path_primary { $$ = list_make1($1); }
+ | '(' expr ')' accessor_op { $$ = list_make2($2, $4); }
+ | '(' predicate ')' accessor_op { $$ = list_make2($2, $4); }
+ | accessor_expr accessor_op { $$ = lappend($1, $2); }
+ ;
+
+expr:
+ accessor_expr { $$ = makeItemList($1); }
+ | '(' expr ')' { $$ = $2; }
+ | '+' expr %prec UMINUS { $$ = makeItemUnary(jpiPlus, $2); }
+ | '-' expr %prec UMINUS { $$ = makeItemUnary(jpiMinus, $2); }
+ | expr '+' expr { $$ = makeItemBinary(jpiAdd, $1, $3); }
+ | expr '-' expr { $$ = makeItemBinary(jpiSub, $1, $3); }
+ | expr '*' expr { $$ = makeItemBinary(jpiMul, $1, $3); }
+ | expr '/' expr { $$ = makeItemBinary(jpiDiv, $1, $3); }
+ | expr '%' expr { $$ = makeItemBinary(jpiMod, $1, $3); }
+ ;
+
+index_elem:
+ expr { $$ = makeItemBinary(jpiSubscript, $1, NULL); }
+ | expr TO_P expr { $$ = makeItemBinary(jpiSubscript, $1, $3); }
+ ;
+
+index_list:
+ index_elem { $$ = list_make1($1); }
+ | index_list ',' index_elem { $$ = lappend($1, $3); }
+ ;
+
+array_accessor:
+ '[' '*' ']' { $$ = makeItemType(jpiAnyArray); }
+ | '[' index_list ']' { $$ = makeIndexArray($2); }
+ ;
+
+any_level:
+ INT_P { $$ = pg_strtoint32($1.val); }
+ | LAST_P { $$ = -1; }
+ ;
+
+any_path:
+ ANY_P { $$ = makeAny(0, -1); }
+ | ANY_P '{' any_level '}' { $$ = makeAny($3, $3); }
+ | ANY_P '{' any_level TO_P any_level '}'
+ { $$ = makeAny($3, $5); }
+ ;
+
+accessor_op:
+ '.' key { $$ = $2; }
+ | '.' '*' { $$ = makeItemType(jpiAnyKey); }
+ | array_accessor { $$ = $1; }
+ | '.' any_path { $$ = $2; }
+ | '.' method '(' ')' { $$ = makeItemType($2); }
+ | '.' DATETIME_P '(' opt_datetime_template ')'
+ { $$ = makeItemUnary(jpiDatetime, $4); }
+ | '?' '(' predicate ')' { $$ = makeItemUnary(jpiFilter, $3); }
+ ;
+
+datetime_template:
+ STRING_P { $$ = makeItemString(&$1); }
+ ;
+
+opt_datetime_template:
+ datetime_template { $$ = $1; }
+ | /* EMPTY */ { $$ = NULL; }
+ ;
+
+key:
+ key_name { $$ = makeItemKey(&$1); }
+ ;
+
+key_name:
+ IDENT_P
+ | STRING_P
+ | TO_P
+ | NULL_P
+ | TRUE_P
+ | FALSE_P
+ | IS_P
+ | UNKNOWN_P
+ | EXISTS_P
+ | STRICT_P
+ | LAX_P
+ | ABS_P
+ | SIZE_P
+ | TYPE_P
+ | FLOOR_P
+ | DOUBLE_P
+ | CEILING_P
+ | DATETIME_P
+ | KEYVALUE_P
+ | LAST_P
+ | STARTS_P
+ | WITH_P
+ | LIKE_REGEX_P
+ | FLAG_P
+ ;
+
+method:
+ ABS_P { $$ = jpiAbs; }
+ | SIZE_P { $$ = jpiSize; }
+ | TYPE_P { $$ = jpiType; }
+ | FLOOR_P { $$ = jpiFloor; }
+ | DOUBLE_P { $$ = jpiDouble; }
+ | CEILING_P { $$ = jpiCeiling; }
+ | KEYVALUE_P { $$ = jpiKeyValue; }
+ ;
+%%
+
+/*
+ * The helper functions below allocate and fill JsonPathParseItem's of various
+ * types.
+ */
+
+static JsonPathParseItem *
+makeItemType(JsonPathItemType type)
+{
+ JsonPathParseItem *v = palloc(sizeof(*v));
+
+ CHECK_FOR_INTERRUPTS();
+
+ v->type = type;
+ v->next = NULL;
+
+ return v;
+}
+
+static JsonPathParseItem *
+makeItemString(JsonPathString *s)
+{
+ JsonPathParseItem *v;
+
+ if (s == NULL)
+ {
+ v = makeItemType(jpiNull);
+ }
+ else
+ {
+ v = makeItemType(jpiString);
+ v->value.string.val = s->val;
+ v->value.string.len = s->len;
+ }
+
+ return v;
+}
+
+static JsonPathParseItem *
+makeItemVariable(JsonPathString *s)
+{
+ JsonPathParseItem *v;
+
+ v = makeItemType(jpiVariable);
+ v->value.string.val = s->val;
+ v->value.string.len = s->len;
+
+ return v;
+}
+
+static JsonPathParseItem *
+makeItemKey(JsonPathString *s)
+{
+ JsonPathParseItem *v;
+
+ v = makeItemString(s);
+ v->type = jpiKey;
+
+ return v;
+}
+
+static JsonPathParseItem *
+makeItemNumeric(JsonPathString *s)
+{
+ JsonPathParseItem *v;
+
+ v = makeItemType(jpiNumeric);
+ v->value.numeric =
+ DatumGetNumeric(DirectFunctionCall3(numeric_in,
+ CStringGetDatum(s->val),
+ ObjectIdGetDatum(InvalidOid),
+ Int32GetDatum(-1)));
+
+ return v;
+}
+
+static JsonPathParseItem *
+makeItemBool(bool val)
+{
+ JsonPathParseItem *v = makeItemType(jpiBool);
+
+ v->value.boolean = val;
+
+ return v;
+}
+
+static JsonPathParseItem *
+makeItemBinary(JsonPathItemType type, JsonPathParseItem *la, JsonPathParseItem *ra)
+{
+ JsonPathParseItem *v = makeItemType(type);
+
+ v->value.args.left = la;
+ v->value.args.right = ra;
+
+ return v;
+}
+
+static JsonPathParseItem *
+makeItemUnary(JsonPathItemType type, JsonPathParseItem *a)
+{
+ JsonPathParseItem *v;
+
+ if (type == jpiPlus && a->type == jpiNumeric && !a->next)
+ return a;
+
+ if (type == jpiMinus && a->type == jpiNumeric && !a->next)
+ {
+ v = makeItemType(jpiNumeric);
+ v->value.numeric =
+ DatumGetNumeric(DirectFunctionCall1(numeric_uminus,
+ NumericGetDatum(a->value.numeric)));
+ return v;
+ }
+
+ v = makeItemType(type);
+
+ v->value.arg = a;
+
+ return v;
+}
+
+static JsonPathParseItem *
+makeItemList(List *list)
+{
+ JsonPathParseItem *head,
+ *end;
+ ListCell *cell;
+
+ head = end = (JsonPathParseItem *) linitial(list);
+
+ if (list_length(list) == 1)
+ return head;
+
+ /* append items to the end of already existing list */
+ while (end->next)
+ end = end->next;
+
+ for_each_from(cell, list, 1)
+ {
+ JsonPathParseItem *c = (JsonPathParseItem *) lfirst(cell);
+
+ end->next = c;
+ end = c;
+ }
+
+ return head;
+}
+
+static JsonPathParseItem *
+makeIndexArray(List *list)
+{
+ JsonPathParseItem *v = makeItemType(jpiIndexArray);
+ ListCell *cell;
+ int i = 0;
+
+ Assert(list_length(list) > 0);
+ v->value.array.nelems = list_length(list);
+
+ v->value.array.elems = palloc(sizeof(v->value.array.elems[0]) *
+ v->value.array.nelems);
+
+ foreach(cell, list)
+ {
+ JsonPathParseItem *jpi = lfirst(cell);
+
+ Assert(jpi->type == jpiSubscript);
+
+ v->value.array.elems[i].from = jpi->value.args.left;
+ v->value.array.elems[i++].to = jpi->value.args.right;
+ }
+
+ return v;
+}
+
+static JsonPathParseItem *
+makeAny(int first, int last)
+{
+ JsonPathParseItem *v = makeItemType(jpiAny);
+
+ v->value.anybounds.first = (first >= 0) ? first : PG_UINT32_MAX;
+ v->value.anybounds.last = (last >= 0) ? last : PG_UINT32_MAX;
+
+ return v;
+}
+
+static JsonPathParseItem *
+makeItemLikeRegex(JsonPathParseItem *expr, JsonPathString *pattern,
+ JsonPathString *flags)
+{
+ JsonPathParseItem *v = makeItemType(jpiLikeRegex);
+ int i;
+ int cflags;
+
+ v->value.like_regex.expr = expr;
+ v->value.like_regex.pattern = pattern->val;
+ v->value.like_regex.patternlen = pattern->len;
+
+ /* Parse the flags string, convert to bitmask. Duplicate flags are OK. */
+ v->value.like_regex.flags = 0;
+ for (i = 0; flags && i < flags->len; i++)
+ {
+ switch (flags->val[i])
+ {
+ case 'i':
+ v->value.like_regex.flags |= JSP_REGEX_ICASE;
+ break;
+ case 's':
+ v->value.like_regex.flags |= JSP_REGEX_DOTALL;
+ break;
+ case 'm':
+ v->value.like_regex.flags |= JSP_REGEX_MLINE;
+ break;
+ case 'x':
+ v->value.like_regex.flags |= JSP_REGEX_WSPACE;
+ break;
+ case 'q':
+ v->value.like_regex.flags |= JSP_REGEX_QUOTE;
+ break;
+ default:
+ ereport(ERROR,
+ (errcode(ERRCODE_SYNTAX_ERROR),
+ errmsg("invalid input syntax for type %s", "jsonpath"),
+ errdetail("Unrecognized flag character \"%.*s\" in LIKE_REGEX predicate.",
+ pg_mblen(flags->val + i), flags->val + i)));
+ break;
+ }
+ }
+
+ /* Convert flags to what RE_compile_and_cache needs */
+ cflags = jspConvertRegexFlags(v->value.like_regex.flags);
+
+ /* check regex validity */
+ (void) RE_compile_and_cache(cstring_to_text_with_len(pattern->val,
+ pattern->len),
+ cflags, DEFAULT_COLLATION_OID);
+
+ return v;
+}
+
+/*
+ * Convert from XQuery regex flags to those recognized by our regex library.
+ */
+int
+jspConvertRegexFlags(uint32 xflags)
+{
+ /* By default, XQuery is very nearly the same as Spencer's AREs */
+ int cflags = REG_ADVANCED;
+
+ /* Ignore-case means the same thing, too, modulo locale issues */
+ if (xflags & JSP_REGEX_ICASE)
+ cflags |= REG_ICASE;
+
+ /* Per XQuery spec, if 'q' is specified then 'm', 's', 'x' are ignored */
+ if (xflags & JSP_REGEX_QUOTE)
+ {
+ cflags &= ~REG_ADVANCED;
+ cflags |= REG_QUOTE;
+ }
+ else
+ {
+ /* Note that dotall mode is the default in POSIX */
+ if (!(xflags & JSP_REGEX_DOTALL))
+ cflags |= REG_NLSTOP;
+ if (xflags & JSP_REGEX_MLINE)
+ cflags |= REG_NLANCH;
+
+ /*
+ * XQuery's 'x' mode is related to Spencer's expanded mode, but it's
+ * not really enough alike to justify treating JSP_REGEX_WSPACE as
+ * REG_EXPANDED. For now we treat 'x' as unimplemented; perhaps in
+ * future we'll modify the regex library to have an option for
+ * XQuery-style ignore-whitespace mode.
+ */
+ if (xflags & JSP_REGEX_WSPACE)
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("XQuery \"x\" flag (expanded regular expressions) is not implemented")));
+ }
+
+ /*
+ * We'll never need sub-match details at execution. While
+ * RE_compile_and_execute would set this flag anyway, force it on here to
+ * ensure that the regex cache entries created by makeItemLikeRegex are
+ * useful.
+ */
+ cflags |= REG_NOSUB;
+
+ return cflags;
+}
+
+/*
+ * jsonpath_scan.l is compiled as part of jsonpath_gram.y. Currently, this is
+ * unavoidable because jsonpath_gram does not create a .h file to export its
+ * token symbols. If these files ever grow large enough to be worth compiling
+ * separately, that could be fixed; but for now it seems like useless
+ * complication.
+ */
+
+#include "jsonpath_scan.c"