DocWire SDK
DocWire SDK: Award-winning modern data processing in C++20. SourceForge Community Choice & Microsoft support. AI-driven processing. Supports nearly 100 data formats, including email boxes and OCR. Boost efficiency in text extraction, web data extraction, data mining, document analysis. Offline processing possible for security and confidentiality
content_type_by_signature.h
1 /*********************************************************************************************************************************************/
2 /* DocWire SDK: Award-winning modern data processing in C++20. SourceForge Community Choice & Microsoft support. AI-driven processing. */
3 /* Supports nearly 100 data formats, including email boxes and OCR. Boost efficiency in text extraction, web data extraction, data mining, */
4 /* document analysis. Offline processing possible for security and confidentiality */
5 /* */
6 /* Copyright (c) SILVERCODERS Ltd, http://silvercoders.com */
7 /* Project homepage: https://github.com/docwire/docwire */
8 /* */
9 /* SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-DocWire-Commercial */
10 /*********************************************************************************************************************************************/
11 
12 #ifndef DOCWIRE_CONTENT_TYPE_BY_SIGNATURE_H
13 #define DOCWIRE_CONTENT_TYPE_BY_SIGNATURE_H
14 
15 #include "chain_element.h"
16 #include "content_type_export.h"
17 #include "data_source.h"
18 #include "document_elements.h"
19 #include "make_error.h"
20 #include "nested_exception.h"
21 #include "ref_or_owned.h"
22 
40 {
41 
43 {
44  bool v;
45 };
46 
58 class DOCWIRE_CONTENT_TYPE_EXPORT database : public with_pimpl<database>
59 {
60 public:
61  explicit database();
62  friend DOCWIRE_CONTENT_TYPE_EXPORT void detect(data_source& data, const database& database_to_use, allow_multiple allow_multiple);
63 };
64 
81 DOCWIRE_CONTENT_TYPE_EXPORT void detect(data_source& data, const database& database_to_use = database{}, allow_multiple allow_multiple = {false});
82 
92 class detector : public chain_element
93 {
94 public:
95 
109  explicit detector(ref_or_owned<database> database_to_use = database{}, allow_multiple allow_multiple = {false})
110  : m_database_to_use(database_to_use), m_allow_multiple{allow_multiple} {}
111 
112  continuation operator()(message_ptr msg, const message_callbacks& emit_message) override
113  {
114  try
115  {
116  if (msg->is<data_source>())
117  {
118  data_source& data = msg->get<data_source>();
119  detect(data, m_database_to_use.get(), m_allow_multiple);
120  }
121  else if (msg->is<document::image>())
122  {
123  data_source& data = msg->get<document::image>().source;
124  detect(data, m_database_to_use.get(), m_allow_multiple);
125  }
126  }
127  catch (const std::exception& e)
128  {
129  emit_message(make_nested_ptr(std::current_exception(), DOCWIRE_MAKE_ERROR("Content type detection by signature failed")));
130  }
131  return emit_message(std::move(msg));
132  }
133 
134  bool is_leaf() const override
135  {
136  return false;
137  }
138 
139 private:
140  ref_or_owned<database> m_database_to_use;
141  allow_multiple m_allow_multiple;
142 };
143 
144 } // namespace docwire::content_type::by_signature
145 
146 #endif // DOCWIRE_CONTENT_TYPE_BY_SIGNATURE_H
friend DOCWIRE_CONTENT_TYPE_EXPORT void detect(data_source &data, const database &database_to_use, allow_multiple allow_multiple)
Detects and assigns content types to the provided data source using signatures-based content detectio...
detector(ref_or_owned< database > database_to_use=database{}, allow_multiple allow_multiple={false})
Constructs a new detector with the given database of signatures and multiple content type detection c...
bool is_leaf() const override
Check if chain element is a leaf (last element which doesn't produce any messages)....
A utility class that simplifies declaring function attributes that need to be stored without requirin...
Definition: ref_or_owned.h:34
Provides content type detection based on file signatures (magic bytes).
DOCWIRE_CONTENT_TYPE_EXPORT void detect(data_source &data, const database &database_to_use=database{}, allow_multiple allow_multiple={false})
Detects and assigns content types to the provided data source using signatures-based content detectio...
std::exception_ptr make_nested_ptr(Inner &&inner, Outer &&outer, Rest &&... rest)
Creates a pointer to a nested exception from an inner exception and an outer exception.