DocWire SDK
DocWire SDK: Award-winning modern data processing in C++20. SourceForge Community Choice & Microsoft support. AI-driven processing. Supports nearly 100 data formats, including email boxes and OCR. Boost efficiency in text extraction, web data extraction, data mining, document analysis. Offline processing possible for security and confidentiality
content_type.h
1 /*********************************************************************************************************************************************/
2 /* DocWire SDK: Award-winning modern data processing in C++20. SourceForge Community Choice & Microsoft support. AI-driven processing. */
3 /* Supports nearly 100 data formats, including email boxes and OCR. Boost efficiency in text extraction, web data extraction, data mining, */
4 /* document analysis. Offline processing possible for security and confidentiality */
5 /* */
6 /* Copyright (c) SILVERCODERS Ltd, http://silvercoders.com */
7 /* Project homepage: https://github.com/docwire/docwire */
8 /* */
9 /* SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-DocWire-Commercial */
10 /*********************************************************************************************************************************************/
11 
12 #ifndef DOCWIRE_CONTENT_TYPE_H
13 #define DOCWIRE_CONTENT_TYPE_H
14 
15 #include "chain_element.h"
16 #include "content_type_by_signature.h"
17 #include "ref_or_owned.h"
18 
39 namespace docwire::content_type
40 {
41 
74 DOCWIRE_CONTENT_TYPE_EXPORT void detect(data_source& data, const by_signature::database& signatures_db_to_use = by_signature::database{});
75 
106 class detector : public chain_element
107 {
108 public:
109 
121  : m_signatures_db_to_use(signatures_db_to_use) {}
122 
123  continuation operator()(message_ptr msg, const message_callbacks& emit_message) override
124  {
125  try
126  {
127  if (msg->is<data_source>())
128  {
129  data_source& data = msg->get<data_source>();
130  content_type::detect(data, m_signatures_db_to_use.get());
131  }
132  else if (msg->is<document::image>())
133  {
134  data_source& data = msg->get<document::image>().source;
135  content_type::detect(data, m_signatures_db_to_use.get());
136  }
137  }
138  catch (const std::exception& e)
139  {
140  emit_message(make_nested_ptr(std::current_exception(), DOCWIRE_MAKE_ERROR("Content type detection failed")));
141  }
142  return emit_message(std::move(msg));
143  }
144 
145  bool is_leaf() const override
146  {
147  return false;
148  }
149 
150 private:
151  ref_or_owned<by_signature::database> m_signatures_db_to_use;
152 };
153 
154 } // namespace docwire::content_type
155 
156 #endif // DOCWIRE_CONTENT_TYPE
Content type detection chain element.
Definition: content_type.h:107
bool is_leaf() const override
Check if chain element is a leaf (last element which doesn't produce any messages)....
Definition: content_type.h:145
detector(ref_or_owned< by_signature::database > signatures_db_to_use=by_signature::database{})
Constructs a new detector with the given database of signatures.
Definition: content_type.h:120
A utility class that simplifies declaring function attributes that need to be stored without requirin...
Definition: ref_or_owned.h:34
Provides a multi-stage pipeline for content type detection.
DOCWIRE_CONTENT_TYPE_EXPORT void detect(data_source &data, const by_signature::database &signatures_db_to_use=by_signature::database{})
std::exception_ptr make_nested_ptr(Inner &&inner, Outer &&outer, Rest &&... rest)
Creates a pointer to a nested exception from an inner exception and an outer exception.