DocWire SDK
DocWire SDK: Award-winning modern data processing in C++20. SourceForge Community Choice & Microsoft support. AI-driven processing. Supports nearly 100 data formats, including email boxes and OCR. Boost efficiency in text extraction, web data extraction, data mining, document analysis. Offline processing possible for security and confidentiality
xml_reader.h
1 /*********************************************************************************************************************************************/
2 /* DocWire SDK: Award-winning modern data processing in C++20. SourceForge Community Choice & Microsoft support. AI-driven processing. */
3 /* Supports nearly 100 data formats, including email boxes and OCR. Boost efficiency in text extraction, web data extraction, data mining, */
4 /* document analysis. Offline processing possible for security and confidentiality */
5 /* */
6 /* Copyright (c) SILVERCODERS Ltd, http://silvercoders.com */
7 /* Project homepage: https://github.com/docwire/docwire */
8 /* */
9 /* SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-DocWire-Commercial */
10 /*********************************************************************************************************************************************/
11 
12 #ifndef DOCWIRE_XML_READER_H
13 #define DOCWIRE_XML_READER_H
14 
15 #include "safety_policy.h"
16 #include "pimpl.h"
17 #include <string_view>
18 #include "ranged.h"
19 #include "xml_export.h"
20 
21 namespace docwire::xml
22 {
26 enum class reader_blanks { keep, ignore };
30 enum class node_type
31 {
32  none = 0,
33  element = 1,
34  attribute = 2,
35  text = 3,
36  cdata = 4,
37  entity_reference = 5,
38  entity = 6,
39  processing_instruction = 7,
40  comment = 8,
41  document = 9,
42  document_type = 10,
43  document_fragment = 11,
44  notation = 12,
45  whitespace = 13,
46  significant_whitespace = 14,
47  end_element = 15,
48  end_entity = 16,
49  xml_declaration = 17
50 };
51 
62 template <safety_policy safety_level = default_safety_level>
63 class DOCWIRE_XML_EXPORT reader : public with_pimpl<reader<safety_level>>
64 {
66 public:
72  explicit reader(std::string_view xml_sv, reader_blanks blanks_option = reader_blanks::keep);
73 
74  // Public low-level methods
79  bool read_next() const;
83  std::string_view content() const;
87  std::string_view name() const;
91  std::string_view full_name() const;
95  std::string_view string_value() const;
96  // Attribute traversal methods
111  void move_to_element() const noexcept(safety_level == relaxed);
112 
116  non_negative<int, safety_level> depth() const;
117 
121  node_type type() const;
122 };
123 }
124 #endif // DOCWIRE_XML_READER_H
A wrapper for numeric types that enforces a range [Min, Max].
Definition: ranged.h:46
A forward-only, non-cached XML reader.
Definition: xml_reader.h:64
std::string_view name() const
Returns the local name of the current node.
bool move_to_first_attribute() const
Moves the reader to the first attribute of the current element.
std::string_view string_value() const
Returns the string value of the current node (concatenated text of children).
bool read_next() const
Advances the reader to the next node.
reader(std::string_view xml_sv, reader_blanks blanks_option=reader_blanks::keep)
Constructs a reader from a string view.
bool move_to_next_attribute() const
Moves the reader to the next attribute.
std::string_view content() const
Returns the content of the current node (e.g., text inside an element).
std::string_view full_name() const
Returns the full name (including namespace) of the current node.
void move_to_element() const noexcept(safety_level==relaxed)
Moves the reader back to the element containing the attributes.
XML processing utilities.
node_type
Represents the type of an XML node.
Definition: xml_reader.h:31
reader_blanks
Options for handling blank nodes in the XML reader.
Definition: xml_reader.h:26