DocWire SDK
DocWire SDK: Award-winning modern data processing in C++20. SourceForge Community Choice & Microsoft support. AI-driven processing. Supports nearly 100 data formats, including email boxes and OCR. Boost efficiency in text extraction, web data extraction, data mining, document analysis. Offline processing possible for security and confidentiality
xml_descendants.h
1 /*********************************************************************************************************************************************/
2 /* DocWire SDK: Award-winning modern data processing in C++20. SourceForge Community Choice & Microsoft support. AI-driven processing. */
3 /* Supports nearly 100 data formats, including email boxes and OCR. Boost efficiency in text extraction, web data extraction, data mining, */
4 /* document analysis. Offline processing possible for security and confidentiality */
5 /* */
6 /* Copyright (c) SILVERCODERS Ltd, http://silvercoders.com */
7 /* Project homepage: https://github.com/docwire/docwire */
8 /* */
9 /* SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-DocWire-Commercial */
10 /* *******************************************************************************************************************************************/
11 
12 #ifndef DOCWIRE_XML_DESCENDANTS_VIEW_H
13 #define DOCWIRE_XML_DESCENDANTS_VIEW_H
14 
15 #include "xml_nodes.h"
16 #include "xml_iterator_state.h"
17 #include "log_scope.h"
18 #include "xml_node_ref.h"
19 #include "not_null.h"
20 #include <memory>
21 #include <ranges>
22 
23 namespace docwire::xml
24 {
25 
33 template <safety_policy safety_level = default_safety_level>
34 class descendants_view : public std::ranges::view_base
35 {
36 public:
37  class iterator;
38  iterator begin() const
39  {
40  return iterator{m_state, m_start_depth};
41  }
42  sentinel end() const { return {}; }
43 
49  explicit descendants_view(not_null<std::shared_ptr<iterator_state<safety_level>>, safety_level> state, int depth)
50  : m_state(std::move(state)), m_start_depth(depth)
51  {}
52 
53 private:
55  int m_start_depth;
56 };
57 
61 template <safety_policy safety_level>
62 class descendants_view<safety_level>::iterator final
63 {
64  friend class descendants_view;
65 
66 public:
67  using iterator_concept = std::input_iterator_tag;
68  using difference_type = std::ptrdiff_t;
70  using pointer = const node_ref<safety_level>*;
71  using reference = const node_ref<safety_level>&;
72 
73  bool operator==(const sentinel& s) const { return m_nodes_iter == s; }
74  reference operator*() const { return *m_nodes_iter; }
75  pointer operator->() const { return m_nodes_iter.operator->(); }
76  iterator& operator++()
77  {
78  log::scope _{ "start_depth"_v = m_start_depth };
79 
80  if (m_state->m_node_ahead_flag)
81  {
82  m_state->m_node_ahead_flag = false;
83  }
84  else
85  {
86  ++m_nodes_iter;
87  }
88 
89  invalidate_if_out_of_scope();
90  return *this;
91  }
92  void operator++(int)
93  {
94  ++(*this);
95  }
96 
97 private:
98  void invalidate_if_out_of_scope()
99  {
100  if (m_nodes_iter != sentinel{} && (*m_nodes_iter).depth() <= m_start_depth)
101  {
102  m_state->m_node_ahead_flag = true; // Mark that the next ++ should not advance the underlying reader.
103  m_nodes_iter.reset(); // Invalidate this iterator to mark it as the end.
104  }
105  }
106 
107  explicit iterator(not_null<std::shared_ptr<iterator_state<safety_level>>, safety_level> state, int start_depth)
108  : m_state(std::move(state)), m_start_depth(start_depth),
109  m_nodes_iter(nodes_view<safety_level>{m_state}.begin())
110  {
111  // If the first node is already out of scope, this iterator is invalid from the start.
112  invalidate_if_out_of_scope();
113  }
114 
116  int m_start_depth;
117  typename nodes_view<safety_level>::iterator m_nodes_iter;
118 };
119 
127 template <safety_policy safety_level>
129 {
130  return descendants_view<safety_level>{node.state(), node.depth()};
131 }
132 
140 template <safety_policy safety_level>
142 {
144 }
145 
146 }
147 
148 #endif
Represents a logging scope.
Definition: log_scope.h:89
A wrapper for pointer-like types that enforces a non-null invariant.
Definition: not_null.h:41
Iterator for recursively traversing descendant nodes.
A view over all descendants of an XML node (recursive).
descendants_view(not_null< std::shared_ptr< iterator_state< safety_level >>, safety_level > state, int depth)
Constructs a view from an iterator state and start depth.
A reference to the current XML node in the reader.
Definition: xml_node_ref.h:33
non_negative< int, safety_level > depth() const
Returns the depth of the node in the XML tree.
Definition: xml_node_ref.h:50
const not_null< std::shared_ptr< iterator_state< safety_level > >, safety_level > & state() const
Returns the shared iterator state associated with this node reference.
Definition: xml_node_ref.h:54
XML processing utilities.
descendants_view< safety_level > descendants(const node_ref< safety_level > &node)
Returns a view of all descendants of the given node.
not_null< std::remove_cvref_t< Ptr > > assume_not_null(Ptr &&ptr)
Wraps a pointer-like object in a not_null, bypassing the runtime check.
Definition: not_null.h:102
A sentinel type used to define the end of a range or view.
Definition: sentinel.h:23
Shared state for XML iterators to coordinate traversal.