summaryrefslogtreecommitdiffstats
path: root/include/orcus/yaml_parser_base.hpp
blob: 13b4c915658a6dc886f977faf691fe91bc5cab8b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 */

#ifndef INCLUDED_ORCUS_YAML_PARSER_BASE_HPP
#define INCLUDED_ORCUS_YAML_PARSER_BASE_HPP

#include "orcus/parser_base.hpp"

#include <memory>
#include <cassert>

namespace orcus { namespace yaml {

namespace detail {

enum class scope_t
{
    unset,
    sequence,
    map,
    multi_line_string
};

enum class keyword_t
{
    unknown,
    boolean_true,
    boolean_false,
    null
};

enum class parse_token_t
{
    unknown,

    // handler tokens (tokens associated with handler events)

    begin_parse,
    end_parse,
    begin_document,
    end_document,
    begin_sequence,
    end_sequence,
    begin_map,
    end_map,
    begin_map_key,
    end_map_key,
    string,
    number,
    boolean_true,
    boolean_false,
    null,

    // non-handler tokens

    begin_sequence_element
};

}

class ORCUS_PSR_DLLPUBLIC parser_base : public ::orcus::parser_base
{
    struct impl;
    std::unique_ptr<impl> mp_impl;

protected:

    // The entire line is empty.
    static const size_t parse_indent_blank_line;

    // End of stream has reached while parsing in the indent part of a line.
    static const size_t parse_indent_end_of_stream;

    static const size_t scope_empty;

    struct key_value
    {
        std::string_view key;
        std::string_view value;
    };

    parser_base() = delete;
    parser_base(const parser_base&) = delete;
    parser_base& operator=(const parser_base&) = delete;

    parser_base(std::string_view content);
    ~parser_base();

    void push_parse_token(detail::parse_token_t t);

    detail::parse_token_t get_last_parse_token() const;

    /**
     * Get the offset position of the last character of the current line
     * without comment or trailing whitespaces (if present).  Call this only
     * after the current line has been parsed to the end, that is, only after
     * parse_to_end_of_line() has been called.
     *
     * @return offset position of the last character of the current line.
     */
    size_t offset_last_char_of_line() const;

    /**
     * Parse the prefix indent part of a line.
     *
     * @return number of whitespace characters encountered.
     */
    size_t parse_indent();

    /**
     * Once a non-whitespace character is reached, parse until the end of the
     * line.
     */
    std::string_view parse_to_end_of_line();

    /**
     * Upon encountering a '#', skip until either the line-feed or the
     * end-of-stream is reached.
     */
    void skip_comment();

    void reset_on_new_line();

    size_t get_scope() const;

    void push_scope(size_t scope_width);

    void clear_scopes();

    detail::scope_t get_scope_type() const;

    void set_scope_type(detail::scope_t type);

    /**
     * Pop the current scope and return the new scope width after the pop.
     *
     * @return new scope width after the pop.
     */
    size_t pop_scope();

    void push_line_back(const char* p, size_t n);

    std::string_view pop_line_front();

    bool has_line_buffer() const;

    size_t get_line_buffer_count() const;

    std::string_view merge_line_buffer();

    /**
     * Get the hash value of current document, or nullptr if a document has
     * not started.
     *
     * @return hash value of current document.
     */
    const char* get_doc_hash() const;

    /**
     * Set the hash value representing the current document.  For now the
     * memory address of the first character of the document is used as its
     * hash value.
     *
     * @param hash hash value of a document.
     */
    void set_doc_hash(const char* hash);

    detail::keyword_t parse_keyword(const char* p, size_t len);

    key_value parse_key_value(const char* p, size_t len);

    std::string_view parse_single_quoted_string_value(const char*& p, size_t max_length);

    std::string_view parse_double_quoted_string_value(const char*& p, size_t max_length);

    void skip_blanks(const char*& p, size_t len);

    void start_literal_block();

    bool in_literal_block() const;

    void handle_line_in_literal(size_t indent);

    void handle_line_in_multi_line_string();
};

}}

#endif

/* vim:set shiftwidth=4 softtabstop=4 expandtab: */