Base class for XML-based document parsers (ODF, OOXML, etc.).
More...
#include <common_xml_document_parser.h>
|
| enum | ODFOOXMLListStyle { number
, bullet
} |
| | Enum for list styles (e.g., numbered or bulleted).
|
| |
|
typedef std::vector< ODFOOXMLListStyle > | ListStyleVector |
| | Type alias for a vector of list styles.
|
| |
|
using | ListStyleMap = std::map< std::string, common_xml_document_parser< safety_level >::ListStyleVector > |
| | Type alias for a map of list style names to their definitions.
|
| |
|
using | CommentMap = std::map< int, common_xml_document_parser< safety_level >::comment > |
| | Type alias for a map of comment IDs to Comment objects.
|
| |
|
using | RelationshipMap = std::map< std::string, common_xml_document_parser< safety_level >::relationship > |
| | Type alias for a map of relationship IDs to Relationship objects.
|
| |
|
using | SharedStringVector = std::vector< shared_string > |
| | Type alias for a vector of shared strings.
|
| |
|
typedef std::function< void(xml::node_ref< safety_level > &xml_node, XmlParseMode mode, zip_reader *zipfile, std::string &text, bool &children_processed, std::string &level_suffix, bool first_on_level)> | CommandHandler |
| | Defines the function signature for an XML tag command handler.
|
| |
|
| void | registerODFOOXMLCommandHandler (const std::string &xml_tag, const CommandHandler &handler) |
| | Registers a handler for a specific XML tag. More...
|
| |
| std::string | parseXmlData (xml::children_view< safety_level > xml_nodes, XmlParseMode mode, zip_reader *zipfile) |
| | Parses XML data from a view of nodes. More...
|
| |
| std::string | parseXmlChildren (xml::node_ref< safety_level > &xml_node, XmlParseMode mode, zip_reader *zipfile) |
| | Parses the children of a given XML node. More...
|
| |
| void | extractText (std::string_view xml_contents, XmlParseMode mode, zip_reader *zipfile, std::string &text) |
| | Extracts text from raw XML content. More...
|
| |
| void | parseODFMetadata (std::string_view xml_content, attributes::metadata &metadata) const |
| | Parses ODF metadata from XML content. More...
|
| |
| const std::string | formatComment (const std::string &author, const std::string &time, const std::string &text) |
| | Formats a comment for output. More...
|
| |
|
size_t & | getListDepth () |
| | Returns the current nesting depth of lists.
|
| |
|
ListStyleMap & | getListStyles () |
| | Gets the map of list styles.
|
| |
|
CommentMap & | getComments () |
| | Gets the map of comments.
|
| |
|
RelationshipMap & | getRelationships () |
| | Gets the map of relationships.
|
| |
|
SharedStringVector & | getSharedStrings () |
| | Gets the vector of shared strings.
|
| |
|
bool | disabledText () const |
| | Checks if text extraction is currently disabled.
|
| |
|
xml::reader_blanks | blanks () const |
| | Gets the current blank node handling policy.
|
| |
|
void | disableText (bool disable) |
| | Enables or disables text extraction.
|
| |
|
void | set_blanks (xml::reader_blanks blanks) |
| | Sets the blank node handling policy for the XML reader.
|
| |
|
void | activeEmittingSignals (bool flag) |
| | Controls whether signal emission (callbacks) is active.
|
| |
|
| common_xml_document_parser () |
| | Default constructor.
|
| |
|
| chain_element (chain_element &&)=default |
| |
|
chain_element & | operator= (chain_element &&)=default |
| |
|
virtual continuation | operator() (message_ptr msg, const message_callbacks &emit_message)=0 |
| |
| virtual bool | is_leaf () const =0 |
| | Check if chain element is a leaf (last element which doesn't produce any messages). At this moment only exporters are leafs. More...
|
| |
|
virtual bool | is_generator () const |
| |
template<safety_policy safety_level = default_safety_level>
class docwire::common_xml_document_parser< safety_level >
Base class for XML-based document parsers (ODF, OOXML, etc.).
This class is inherited by specific parsers (e.g., odf_ooxml_parser, odfxml_parser). It allows registering handlers for specific XML tags.
- Template Parameters
-
| safety_level | The safety policy used for XML parsing operations. |
- See also
- xml::reader
-
XML parsing example
Definition at line 41 of file common_xml_document_parser.h.
◆ extractText()
template<safety_policy safety_level = default_safety_level>
Extracts text from raw XML content.
This is a high-level function that initializes the XML reader and calls parseXmlData.
- Parameters
-
| xml_contents | The raw XML string. |
| mode | The parsing mode. |
| zipfile | Pointer to the zip_reader if applicable. |
| text | Output parameter where the extracted text will be appended. |
◆ formatComment()
template<safety_policy safety_level = default_safety_level>
Formats a comment for output.
- Parameters
-
| author | The author of the comment. |
| time | The timestamp of the comment. |
| text | The content of the comment. |
- Returns
- The formatted comment string.
◆ parseODFMetadata()
template<safety_policy safety_level = default_safety_level>
Parses ODF metadata from XML content.
- Parameters
-
| xml_content | The raw XML content of the metadata file. |
| metadata | The structure to populate with parsed metadata. |
◆ parseXmlChildren()
template<safety_policy safety_level = default_safety_level>
Parses the children of a given XML node.
- Parameters
-
| xml_node | The parent node whose children will be parsed. |
| mode | The parsing mode. |
| zipfile | Pointer to the zip_reader if applicable. |
- Returns
- The extracted text content from the children.
◆ parseXmlData()
template<safety_policy safety_level = default_safety_level>
Parses XML data from a view of nodes.
Iterates through the provided XML nodes and executes registered command handlers.
- Parameters
-
| xml_nodes | The view of XML nodes to parse. |
| mode | The parsing mode (e.g., PARSE_XML, STRIP_XML). |
| zipfile | Pointer to the zip_reader if the XML is part of a zipped archive (e.g., DOCX, ODT). |
- Returns
- The extracted text content.
◆ registerODFOOXMLCommandHandler()
template<safety_policy safety_level = default_safety_level>
Registers a handler for a specific XML tag.
Derived classes can use this to add or override behavior for specific XML tags.
- Parameters
-
| xml_tag | The XML tag name to handle. |
| handler | The function to execute when the tag is encountered. |
The documentation for this class was generated from the following file: