diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-15 05:48:59 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-15 05:48:59 +0000 |
commit | c484829272cd13a738e35412498e12f2c9a194ac (patch) | |
tree | a1f5ec09629ee895bd3963fa8820b45f2f4c574b /src/orcus_xml_main.cpp | |
parent | Initial commit. (diff) | |
download | liborcus-upstream.tar.xz liborcus-upstream.zip |
Adding upstream version 0.19.2.upstream/0.19.2upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/orcus_xml_main.cpp')
-rw-r--r-- | src/orcus_xml_main.cpp | 350 |
1 files changed, 350 insertions, 0 deletions
diff --git a/src/orcus_xml_main.cpp b/src/orcus_xml_main.cpp new file mode 100644 index 0000000..a1f38c5 --- /dev/null +++ b/src/orcus_xml_main.cpp @@ -0,0 +1,350 @@ +/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ +/* + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + */ + +#include "orcus/orcus_xml.hpp" +#include "orcus/xml_namespace.hpp" +#include "orcus/xml_structure_tree.hpp" +#include "orcus/dom_tree.hpp" +#include "orcus/spreadsheet/factory.hpp" +#include "orcus/spreadsheet/document.hpp" +#include "orcus/stream.hpp" +#include "orcus/sax_parser_base.hpp" + +#include "orcus_filter_global.hpp" +#include "cli_global.hpp" + +#include <cstdlib> +#include <cstring> +#include <iostream> +#include <fstream> +#include <boost/program_options.hpp> +#include <mdds/sorted_string_map.hpp> + +#include "filesystem_env.hpp" + +using namespace orcus; +using namespace std; +namespace po = boost::program_options; + +namespace { + +namespace output_mode { + +enum class type { + unknown, + dump, + map, + map_gen, + transform_xml, + structure, +}; + +using map_type = mdds::sorted_string_map<type, mdds::string_view_map_entry>; + +// Keys must be sorted. +constexpr map_type::entry entries[] = { + { "dump", type::dump }, + { "map", type::map }, + { "map-gen", type::map_gen }, + { "structure", type::structure }, + { "transform", type::transform_xml }, +}; + +const map_type& get() +{ + static const map_type mt(entries, std::size(entries), type::unknown); + return mt; +} + +} // namespace output_mode + +std::string to_string(output_mode::type t) +{ + for (const output_mode::map_type::entry& e : output_mode::entries) + if (t == e.value) + return std::string(e.key); + + return std::string(); +} + +void print_usage(ostream& os, const po::options_description& desc) +{ + os << "Usage: orcus-xml [OPTIONS] FILE" << endl << endl; + os << desc; +} + +std::string build_output_help_text() +{ + std::ostringstream os; + os << "Path to either an output directory, or an output file."; + return os.str(); +} + +std::string build_mode_help_text() +{ + std::ostringstream os; + os << "Mode of operation. Select one of the following options: "; + auto it = output_mode::entries, ite = output_mode::entries + std::size(output_mode::entries); + --ite; + + for (; it != ite; ++it) + os << std::string(it->key) << ", "; + + os << "or " << std::string(it->key) << "."; + return os.str(); +} + +std::string build_map_help_text() +{ + std::ostringstream os; + os << "Path to the map file. A map file is required for all modes except for the " + << to_string(output_mode::type::structure) << " mode."; + return os.str(); +} + +bool parse_and_dump_structure(const file_content& content, const std::string& output) +{ + xmlns_repository repo; + xmlns_context cxt = repo.create_context(); + xml_structure_tree tree(cxt); + tree.parse(content.str()); + + if (output.empty()) + { + tree.dump_compact(cout); + return true; + } + + ofstream file(output); + if (!file) + { + cerr << "failed to create output file: " << output << endl; + return false; + } + + tree.dump_compact(file); + + return true; +} + +void dump_document_structure(const file_content& content, output_stream& os) +{ + xmlns_repository repo; + xmlns_context cxt = repo.create_context(); + dom::document_tree tree(cxt); + tree.load(content.str()); + + tree.dump_compact(os.get()); +} + +} // anonymous namespace + +int main(int argc, char** argv) try +{ + po::options_description desc("Options"); + desc.add_options() + ("help,h", "Print this help.") + ("mode", po::value<std::string>(), build_mode_help_text().data()) + ("map,m", po::value<std::string>(), build_map_help_text().data()) + ("output,o", po::value<std::string>(), build_output_help_text().data()) + ("output-format,f", po::value<string>(), gen_help_output_format().data()) + ; + + po::options_description hidden(""); + hidden.add_options() + ("input", po::value<string>(), "input file"); + + po::positional_options_description po_desc; + po_desc.add("input", 1); + + po::options_description cmd_opt; + cmd_opt.add(desc).add(hidden); + + po::variables_map vm; + try + { + po::store( + po::command_line_parser(argc, argv).options(cmd_opt).positional(po_desc).run(), vm); + po::notify(vm); + } + catch (const exception& e) + { + // Unknown options. + cerr << e.what() << endl; + print_usage(cout, desc); + return EXIT_FAILURE; + } + + if (vm.count("help")) + { + print_usage(cout, desc); + return EXIT_FAILURE; + } + + if (!vm.count("input")) + { + cerr << "No input file." << endl; + print_usage(cout, desc); + return EXIT_FAILURE; + } + + if (!vm.count("mode")) + { + cerr << "Mode not specified." << endl; + print_usage(cout, desc); + return EXIT_FAILURE; + } + + std::string s = vm["mode"].as<std::string>(); + output_mode::type mode = output_mode::get().find(s); + + if (mode == output_mode::type::unknown) + { + cerr << "Unknown output mode: " << s << endl; + print_usage(cout, desc); + return EXIT_FAILURE; + } + + fs::path input_path = vm["input"].as<std::string>(); + + if (!fs::is_regular_file(input_path)) + { + cerr << input_path << " is not a valid file." << endl; + return EXIT_FAILURE; + } + + fs::path map_path; + if (vm.count("map")) + { + map_path = vm["map"].as<std::string>(); + + if (!fs::is_regular_file(map_path)) + { + cerr << map_path << " is not a valid map file." << endl; + return EXIT_FAILURE; + } + } + + std::string output; + if (vm.count("output")) + output = vm["output"].as<std::string>(); + + file_content content(input_path.string().data()); + + try + { + switch (mode) + { + case output_mode::type::structure: + { + bool success = parse_and_dump_structure(content, output); + return success ? EXIT_SUCCESS : EXIT_FAILURE; + } + case output_mode::type::dump: + { + output_stream os(vm); + dump_document_structure(content, os); + return EXIT_SUCCESS; + } + case output_mode::type::map_gen: + { + output_stream os(vm); + xmlns_repository repo; + orcus_xml app(repo, nullptr, nullptr); + app.write_map_definition(content.str(), os.get()); + return EXIT_SUCCESS; + } + default: + ; + } + + spreadsheet::range_size_t ss{1048576, 16384}; + spreadsheet::document doc{ss}; + spreadsheet::import_factory import_fact(doc); + spreadsheet::export_factory export_fact(doc); + + xmlns_repository repo; + orcus_xml app(repo, &import_fact, &export_fact); + + if (map_path.empty()) + app.detect_map_definition(content.str()); + else + { + file_content map_content(map_path.string().data()); + app.read_map_definition(map_content.str()); + } + + app.read_stream(content.str()); + + switch (mode) + { + case output_mode::type::map: + { + dump_format_t format = dump_format_t::unknown; + s.clear(); + + if (vm.count("output-format")) + { + s = vm["output-format"].as<std::string>(); + format = to_dump_format_enum(s); + } + else + { + cerr << "Output format is not specified." << endl; + print_usage(cout, desc); + return EXIT_FAILURE; + } + + if (format == dump_format_t::unknown) + { + std::cerr << "Unsupported output format: '" << s << "'" << endl; + return EXIT_FAILURE; + } + + doc.dump(format, output); + break; + } + case output_mode::type::transform_xml: + { + if (output.empty()) + { + cout << "output xml file name not provided" << endl; + print_usage(cout, desc); + return EXIT_FAILURE; + } + + ofstream file(output); + if (!file) + { + cerr << "failed to create output file: " << output << endl; + return EXIT_FAILURE; + } + + // Write transformed xml content to file. + app.write(content.str(), file); + break; + } + default: + ; + } + } + catch (const malformed_xml_error& e) + { + cerr << create_parse_error_output(content.str(), e.offset()) << endl; + cerr << e.what() << endl; + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; +} +catch (const std::exception& e) +{ + cerr << e.what() << endl; + return EXIT_FAILURE; +} + +/* vim:set shiftwidth=4 softtabstop=4 expandtab: */ |