1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
|
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
#ifndef INCLUDED_ORCUS_PARSER_BASE_HPP
#define INCLUDED_ORCUS_PARSER_BASE_HPP
#include "env.hpp"
#include "exception.hpp"
#include <string>
#include <cstdlib>
#include <cstddef>
#include <cassert>
#include <functional>
namespace orcus {
class ORCUS_PSR_DLLPUBLIC parser_base
{
protected:
using numeric_parser_type = std::function<const char*(const char*, const char*, double&)>;
const char* const mp_begin;
const char* mp_char;
const char* mp_end;
private:
numeric_parser_type m_func_parse_numeric;
protected:
parser_base(const char* p, size_t n);
void set_numeric_parser(const numeric_parser_type& func)
{
m_func_parse_numeric = func;
}
bool has_char() const
{
assert(mp_char <= mp_end);
return mp_char != mp_end;
}
bool has_next() const
{
assert((mp_char+1) <= mp_end);
return (mp_char+1) != mp_end;
}
void next(size_t inc=1) { mp_char += inc; }
void prev(size_t dec=1);
char cur_char() const { return *mp_char; }
/**
* Peek a character at specified offset from the current position without
* advancing the current position.
*
* @note The caller <strong>must</strong> ensure that the specified offset
* position is a valid position. This method does not check its
* validity.
*
* @param offset offset from the current position to peek at.
*
* @return character at a specified offset position from the current
* position.
*/
char peek_char(std::size_t offset=1) const;
/**
* Peek a segment of contiguous characters of a specified length starting
* from the current position.
*
* @note The caller <strong>must</strong> ensure that the specified
* substring segment is entirely valid. This method does not check
* its validity.
*
* @param length length of the segment to peek.
*
* @return segment of contiguous characters.
*/
std::string_view peek_chars(std::size_t length) const;
/**
* Skip an optional byte order mark at the current position of the stream.
*
* Currently we only check for UTF-8 BOM.
*/
void skip_bom();
void skip(std::string_view chars_to_skip);
/**
* Skip all characters that are 0-32 in ASCII range
*/
void skip_space_and_control();
/**
* Parse and check next characters to see if it matches specified
* character sequence.
*
* @param expected sequence of characters to match against.
*
* @return true if it matches specified character sequence, false
* otherwise.
*/
bool parse_expected(std::string_view expected);
/**
* Try to parse the next characters as double, or return NaN in case of
* failure.
*
* @return double value on success, or NaN on failure.
*/
double parse_double();
/**
* Determine the number of characters remaining <strong>after</strong> the
* current character. For instance, if the current character is on the
* last character in the stream, this method will return 0, whereas if
* it's on the first character, it will return the total length - 1.
*
* @return number of characters remaining after the current character.
*/
size_t remaining_size() const;
/**
* Determine the number of characters available from the current character
* to the end of the buffer. The current character is included.
*
* @return number of characters available including the current character.
*/
size_t available_size() const
{
return std::distance(mp_char, mp_end);
}
/**
* Return the current offset from the beginning of the character stream.
*
* @return current offset from the beginning of the character stream.
*/
std::ptrdiff_t offset() const;
};
}
#endif
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|