summaryrefslogtreecommitdiffstats
path: root/include/orcus/yaml_parser_base.hpp
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-15 05:48:59 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-15 05:48:59 +0000
commitc484829272cd13a738e35412498e12f2c9a194ac (patch)
treea1f5ec09629ee895bd3963fa8820b45f2f4c574b /include/orcus/yaml_parser_base.hpp
parentInitial commit. (diff)
downloadliborcus-upstream.tar.xz
liborcus-upstream.zip
Adding upstream version 0.19.2.upstream/0.19.2upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'include/orcus/yaml_parser_base.hpp')
-rw-r--r--include/orcus/yaml_parser_base.hpp195
1 files changed, 195 insertions, 0 deletions
diff --git a/include/orcus/yaml_parser_base.hpp b/include/orcus/yaml_parser_base.hpp
new file mode 100644
index 0000000..13b4c91
--- /dev/null
+++ b/include/orcus/yaml_parser_base.hpp
@@ -0,0 +1,195 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_YAML_PARSER_BASE_HPP
+#define INCLUDED_ORCUS_YAML_PARSER_BASE_HPP
+
+#include "orcus/parser_base.hpp"
+
+#include <memory>
+#include <cassert>
+
+namespace orcus { namespace yaml {
+
+namespace detail {
+
+enum class scope_t
+{
+ unset,
+ sequence,
+ map,
+ multi_line_string
+};
+
+enum class keyword_t
+{
+ unknown,
+ boolean_true,
+ boolean_false,
+ null
+};
+
+enum class parse_token_t
+{
+ unknown,
+
+ // handler tokens (tokens associated with handler events)
+
+ begin_parse,
+ end_parse,
+ begin_document,
+ end_document,
+ begin_sequence,
+ end_sequence,
+ begin_map,
+ end_map,
+ begin_map_key,
+ end_map_key,
+ string,
+ number,
+ boolean_true,
+ boolean_false,
+ null,
+
+ // non-handler tokens
+
+ begin_sequence_element
+};
+
+}
+
+class ORCUS_PSR_DLLPUBLIC parser_base : public ::orcus::parser_base
+{
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+
+protected:
+
+ // The entire line is empty.
+ static const size_t parse_indent_blank_line;
+
+ // End of stream has reached while parsing in the indent part of a line.
+ static const size_t parse_indent_end_of_stream;
+
+ static const size_t scope_empty;
+
+ struct key_value
+ {
+ std::string_view key;
+ std::string_view value;
+ };
+
+ parser_base() = delete;
+ parser_base(const parser_base&) = delete;
+ parser_base& operator=(const parser_base&) = delete;
+
+ parser_base(std::string_view content);
+ ~parser_base();
+
+ void push_parse_token(detail::parse_token_t t);
+
+ detail::parse_token_t get_last_parse_token() const;
+
+ /**
+ * Get the offset position of the last character of the current line
+ * without comment or trailing whitespaces (if present). Call this only
+ * after the current line has been parsed to the end, that is, only after
+ * parse_to_end_of_line() has been called.
+ *
+ * @return offset position of the last character of the current line.
+ */
+ size_t offset_last_char_of_line() const;
+
+ /**
+ * Parse the prefix indent part of a line.
+ *
+ * @return number of whitespace characters encountered.
+ */
+ size_t parse_indent();
+
+ /**
+ * Once a non-whitespace character is reached, parse until the end of the
+ * line.
+ */
+ std::string_view parse_to_end_of_line();
+
+ /**
+ * Upon encountering a '#', skip until either the line-feed or the
+ * end-of-stream is reached.
+ */
+ void skip_comment();
+
+ void reset_on_new_line();
+
+ size_t get_scope() const;
+
+ void push_scope(size_t scope_width);
+
+ void clear_scopes();
+
+ detail::scope_t get_scope_type() const;
+
+ void set_scope_type(detail::scope_t type);
+
+ /**
+ * Pop the current scope and return the new scope width after the pop.
+ *
+ * @return new scope width after the pop.
+ */
+ size_t pop_scope();
+
+ void push_line_back(const char* p, size_t n);
+
+ std::string_view pop_line_front();
+
+ bool has_line_buffer() const;
+
+ size_t get_line_buffer_count() const;
+
+ std::string_view merge_line_buffer();
+
+ /**
+ * Get the hash value of current document, or nullptr if a document has
+ * not started.
+ *
+ * @return hash value of current document.
+ */
+ const char* get_doc_hash() const;
+
+ /**
+ * Set the hash value representing the current document. For now the
+ * memory address of the first character of the document is used as its
+ * hash value.
+ *
+ * @param hash hash value of a document.
+ */
+ void set_doc_hash(const char* hash);
+
+ detail::keyword_t parse_keyword(const char* p, size_t len);
+
+ key_value parse_key_value(const char* p, size_t len);
+
+ std::string_view parse_single_quoted_string_value(const char*& p, size_t max_length);
+
+ std::string_view parse_double_quoted_string_value(const char*& p, size_t max_length);
+
+ void skip_blanks(const char*& p, size_t len);
+
+ void start_literal_block();
+
+ bool in_literal_block() const;
+
+ void handle_line_in_literal(size_t indent);
+
+ void handle_line_in_multi_line_string();
+};
+
+}}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */