12 #ifndef DOCWIRE_COMMON_XML_PARSER_H
13 #define DOCWIRE_COMMON_XML_PARSER_H
15 #include "attributes.h"
16 #include "chain_element.h"
18 #include "xml_children.h"
28 enum XmlParseMode { PARSE_XML, FIX_XML, STRIP_XML };
40 template <safety_policy safety_level = default_safety_level>
69 comment(
const std::string& author,
const std::string& time,
const std::string& text)
70 : m_author(author), m_time(time), m_text(text) {}
88 using ListStyleMap = std::map<std::string, common_xml_document_parser<safety_level>::ListStyleVector>;
90 using CommentMap = std::map<int, common_xml_document_parser<safety_level>::comment>;
92 using RelationshipMap = std::map<std::string, common_xml_document_parser<safety_level>::relationship>;
101 bool& children_processed, std::string& level_suffix,
bool first_on_level)>
CommandHandler;
161 const std::string
formatComment(
const std::string& author,
const std::string& time,
const std::string& text);
Helper class to manage the context stack scope. Pushes a new context on construction and pops it on d...
scoped_context_stack_push(common_xml_document_parser &parser, const message_callbacks &emit_message)
Constructs the helper and pushes a new context onto the parser's stack.
~scoped_context_stack_push()
Destructor that pops the context from the parser's stack.
Base class for XML-based document parsers (ODF, OOXML, etc.).
RelationshipMap & getRelationships()
Gets the map of relationships.
size_t & getListDepth()
Returns the current nesting depth of lists.
CommentMap & getComments()
Gets the map of comments.
std::map< std::string, common_xml_document_parser< safety_level >::ListStyleVector > ListStyleMap
Type alias for a map of list style names to their definitions.
void registerODFOOXMLCommandHandler(const std::string &xml_tag, const CommandHandler &handler)
Registers a handler for a specific XML tag.
void activeEmittingSignals(bool flag)
Controls whether signal emission (callbacks) is active.
void disableText(bool disable)
Enables or disables text extraction.
common_xml_document_parser()
Default constructor.
SharedStringVector & getSharedStrings()
Gets the vector of shared strings.
ListStyleMap & getListStyles()
Gets the map of list styles.
const std::string formatComment(const std::string &author, const std::string &time, const std::string &text)
Formats a comment for output.
std::map< int, common_xml_document_parser< safety_level >::comment > CommentMap
Type alias for a map of comment IDs to Comment objects.
bool disabledText() const
Checks if text extraction is currently disabled.
void set_blanks(xml::reader_blanks blanks)
Sets the blank node handling policy for the XML reader.
std::vector< shared_string > SharedStringVector
Type alias for a vector of shared strings.
void parseODFMetadata(std::string_view xml_content, attributes::metadata &metadata) const
Parses ODF metadata from XML content.
ODFOOXMLListStyle
Enum for list styles (e.g., numbered or bulleted).
void extractText(std::string_view xml_contents, XmlParseMode mode, zip_reader *zipfile, std::string &text)
Extracts text from raw XML content.
std::string parseXmlData(xml::children_view< safety_level > xml_nodes, XmlParseMode mode, zip_reader *zipfile)
Parses XML data from a view of nodes.
std::function< void(xml::node_ref< safety_level > &xml_node, XmlParseMode mode, zip_reader *zipfile, std::string &text, bool &children_processed, std::string &level_suffix, bool first_on_level)> CommandHandler
Defines the function signature for an XML tag command handler.
std::map< std::string, common_xml_document_parser< safety_level >::relationship > RelationshipMap
Type alias for a map of relationship IDs to Relationship objects.
std::string parseXmlChildren(xml::node_ref< safety_level > &xml_node, XmlParseMode mode, zip_reader *zipfile)
Parses the children of a given XML node.
std::vector< ODFOOXMLListStyle > ListStyleVector
Type alias for a vector of list styles.
xml::reader_blanks blanks() const
Gets the current blank node handling policy.
A view over the direct children of an XML node.
A reference to the current XML node in the reader.
reader_blanks
Options for handling blank nodes in the XML reader.
The main namespace for the DocWire SDK.
Represents a relationship, typically for hyperlinks or embedded objects.
Represents a shared string, a common optimization in OOXML formats.