1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
|
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
#ifndef ORCUS_PARSER_GLOBAL_HPP
#define ORCUS_PARSER_GLOBAL_HPP
#include "env.hpp"
#include <sstream>
namespace orcus {
class cell_buffer;
enum class string_escape_char_t
{
invalid,
valid,
control_char
};
/**
* Stores state of string parsing. Upon successful parsing the str points
* to the first character of the string and the length stores the size of
* the string. When the parsing fails, the str value becomes nullptr and
* the length stores the error code.
*/
struct parse_quoted_string_state
{
ORCUS_PSR_DLLPUBLIC static const size_t error_no_closing_quote;
ORCUS_PSR_DLLPUBLIC static const size_t error_illegal_escape_char;
const char* str;
size_t length;
/**
* When true, the str pointer points to the temporary buffer storage
* provided by the caller instead of the original character stream. The
* caller must allocate memory and copy the value to it before the buffer
* content changes if the parsed string value needs to be stored.
*
* When false, str points to a position in the original stream, and the
* caller doens't need to allocate memory to store the string value as
* long as the original character stream is alive.
*/
bool transient;
/**
* When true, the string contains at least one control character - a
* character whose value ranges between 0x00 and 0x1F.
*/
bool has_control_character;
};
ORCUS_PSR_DLLPUBLIC bool is_blank(char c);
ORCUS_PSR_DLLPUBLIC bool is_alpha(char c);
ORCUS_PSR_DLLPUBLIC bool is_numeric(char c);
/**
* Check if the characater is one of allowed characters. Note that you can
* only specify up to 16 allowed characters.
*
* @param c character to check.
* @param allowed string containing all allowed characters.
*
* @return true if the character is one of the allowed characters, false
* otherwise.
*/
ORCUS_PSR_DLLPUBLIC bool is_in(char c, std::string_view allowed);
/**
* Parse a sequence of characters into a double-precision numeric value.
*
* @param p pointer to the first character to start parsing from.
* @param p_end pointer to the first character not allowed to parse.
* @param value output parameter to assign the matched value to.
*
* @return pointer to the first non-matching character.
*/
ORCUS_PSR_DLLPUBLIC const char* parse_numeric(const char* p, const char* p_end, double& value);
/**
* Parse a sequence of characters into an integer value.
*
* @param p pointer to the first character to start parsing from.
* @param p_end pointer to the first character not allowed to parse.
* @param value output parameter to assign the matched value to.
*
* @return pointer to the first non-matching character.
*
* @note Use of this function should be eventually replaced with
* std::from_chars() once it becomes available.
*/
ORCUS_PSR_DLLPUBLIC const char* parse_integer(const char* p, const char* p_end, long& value);
/**
* Two single-quote characters ('') represent one single-quote character.
*/
ORCUS_PSR_DLLPUBLIC parse_quoted_string_state parse_single_quoted_string(
const char*& p, size_t max_length, cell_buffer& buffer);
/**
* Starting from the opening single quote position, parse string all the way
* to the closing quote. Two single-quote characters ('') will be
* interpreted as encoded one single-quote character.
*
* @param p it should point to the opening single quote character.
* @param max_length maximum length to parse.
*
* @return address of the character immediately after the closing quote, or
* nullptr in case no closing quote is found.
*/
ORCUS_PSR_DLLPUBLIC const char* parse_to_closing_single_quote(
const char* p, size_t max_length);
ORCUS_PSR_DLLPUBLIC parse_quoted_string_state parse_double_quoted_string(
const char*& p, size_t max_length, cell_buffer& buffer);
/**
* Starting from the opening double quote position, parse string all the way
* to the closing quote. Two single-quote characters ('') will be
* interpreted as encoded one single-quote character.
*
* @param p it should point to the opening single quote character.
* @param max_length maximum length to parse.
*
* @return address of the character immediately after the closing quote, or
* nullptr in case no closing quote is found.
*/
ORCUS_PSR_DLLPUBLIC const char* parse_to_closing_double_quote(
const char* p, size_t max_length);
/**
* Given a character that occurs immediately after the escape character '\',
* return what type this character is.
*
* @param c character that occurs immediately after the escape character
* '\'.
*
* @return enum value representing the type of escape character.
*/
ORCUS_PSR_DLLPUBLIC string_escape_char_t get_string_escape_char_type(char c);
ORCUS_PSR_DLLPUBLIC std::string_view trim(std::string_view str);
}
#endif
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|