DocWire SDK
DocWire SDK: Award-winning modern data processing in C++20. SourceForge Community Choice & Microsoft support. AI-driven processing. Supports nearly 100 data formats, including email boxes and OCR. Boost efficiency in text extraction, web data extraction, data mining, document analysis. Offline processing possible for security and confidentiality
ocr_parser.h
1 /*********************************************************************************************************************************************/
2 /* DocWire SDK: Award-winning modern data processing in C++20. SourceForge Community Choice & Microsoft support. AI-driven processing. */
3 /* Supports nearly 100 data formats, including email boxes and OCR. Boost efficiency in text extraction, web data extraction, data mining, */
4 /* document analysis. Offline processing possible for security and confidentiality */
5 /* */
6 /* Copyright (c) SILVERCODERS Ltd, http://silvercoders.com */
7 /* Project homepage: https://github.com/docwire/docwire */
8 /* */
9 /* SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-DocWire-Commercial */
10 /*********************************************************************************************************************************************/
11 
12 #ifndef DOCWIRE_OCR_PARSER_H
13 #define DOCWIRE_OCR_PARSER_H
14 
15 #include "chain_element.h"
16 #include <cstdint>
17 #include "data_source.h"
18 #include <filesystem>
19 #include "language.h"
20 #include "ocr_export.h"
21 #include <optional>
22 #include "pimpl.h"
23 #include <vector>
24 
25 namespace docwire
26 {
27 
28 namespace ocr
29 {
30 
31 struct please_wait {};
32 
33 } // namespace ocr
34 
35 struct ocr_confidence_threshold { std::optional<float> v; };
36 struct ocr_data_path { std::filesystem::path v; };
37 struct ocr_timeout { std::optional<int32_t> v; };
38 
39 class DOCWIRE_OCR_EXPORT ocr_parser : public chain_element, public with_pimpl<ocr_parser>
40 {
41 private:
44 
45 public:
46 
47  ocr_parser(const std::vector<Language>& languages = {},
48  ocr_confidence_threshold ocr_confidence_threshold_arg = {},
49  ocr_timeout ocr_timeout_arg = {},
50  ocr_data_path ocr_data_path_arg = {});
51 
52  continuation operator()(message_ptr msg, const message_callbacks& emit_message) override;
53 
54  bool is_leaf() const override { return false; }
55 
56 private:
57  void parse(const data_source& data, const std::vector<Language>& languages);
58 };
59 
60 } // namespace docwire
61 
62 #endif // DOCWIRE_OCR_PARSER_H
bool is_leaf() const override
Check if chain element is a leaf (last element which doesn't produce any messages)....
Definition: ocr_parser.h:54
The main namespace for the DocWire SDK.
Definition: ai_elements.h:19