12 #ifndef DOCWIRE_DATA_SOURCE_H
13 #define DOCWIRE_DATA_SOURCE_H
15 #include "core_export.h"
16 #include "file_extension.h"
20 #include "memory_buffer.h"
22 #include <string_view>
23 #include "unique_identifier.h"
24 #include <unordered_map>
34 std::shared_ptr<std::istream> v;
40 std::shared_ptr<std::istream> v;
53 bool operator==(
const mime_type& rhs)
const =
default;
67 return hash<std::string>{}(mt.v);
93 std::is_same_v<T, std::filesystem::path> ||
94 std::is_same_v<T, std::vector<std::byte>> ||
95 std::is_same_v<T, std::span<const std::byte>> ||
96 std::is_same_v<T, std::string> ||
97 std::is_same_v<T, std::string_view> ||
98 std::is_same_v<T, seekable_stream_ptr> ||
99 std::is_same_v<T, unseekable_stream_ptr>;
104 template <
typename T>
111 template<
class... Ts>
134 template <data_source_compatible_type T>
143 template <data_source_compatible_type T>
145 : m_source{std::move(source)}
153 template <data_source_compatible_type T>
163 template <data_source_compatible_type T>
174 template <data_source_compatible_type T>
178 add_mime_type(
mime_type, mime_type_confidence);
187 template <data_source_compatible_type T>
189 : m_source{std::move(source)}
191 add_mime_type(
mime_type, mime_type_confidence);
199 std::span<const std::byte>
span(std::optional<length_limit> limit = std::nullopt)
const;
206 std::string
string(std::optional<length_limit> limit = std::nullopt)
const;
216 std::string_view
string_view(std::optional<length_limit> limit = std::nullopt)
const;
219 std::shared_ptr<std::istream>
istream()
const;
222 std::optional<std::filesystem::path>
path()
const;
242 auto hc_mt_it = std::max_element(mime_types.begin(), mime_types.end(),
243 [](
const auto& p1,
const auto& p2)
245 if (p1.second != p2.second)
246 return p1.second < p2.second;
249 return p1.first.v > p2.first.v;
251 if (hc_mt_it != mime_types.end())
260 auto hc_mt = highest_confidence_mime_type_info();
270 auto hc_mt = highest_confidence_mime_type_info();
272 return hc_mt->second;
274 return confidence::none;
288 auto mt_iter = mime_types.find(mt);
289 if (mt_iter == mime_types.end())
290 return confidence::none;
292 return mt_iter->second;
302 auto [existing_it, inserted] = mime_types.try_emplace(mt, c);
303 if (!inserted && existing_it->second < c)
304 existing_it->second = c;
312 std::optional<docwire::file_extension> m_file_extension;
313 mutable std::shared_ptr<memory_buffer> m_memory_cache;
314 mutable std::shared_ptr<std::istream> m_path_stream;
315 mutable std::optional<size_t> m_stream_size;
318 void fill_memory_cache(std::optional<length_limit> limit)
const;
std::string string(std::optional< length_limit > limit=std::nullopt) const
Returns the content as a string.
data_source(T &&source, file_extension file_extension)
Constructs a data_source by moving, with an explicit file extension.
data_source(T &&source)
Constructs a data_source by moving from a compatible type.
data_source(const T &source, file_extension file_extension)
Constructs a data_source with an explicit file extension.
confidence highest_mime_type_confidence() const
Returns the highest confidence level found among detected MIME types.
bool has_highest_confidence_mime_type_in(const std::vector< mime_type > &mts) const
Checks if the highest confidence mime type is present in the given list.
confidence mime_type_confidence(mime_type mt) const
Returns the confidence level for a specific MIME type.
std::span< const std::byte > span(std::optional< length_limit > limit=std::nullopt) const
Returns the content as a span of bytes.
void assert_not_encrypted() const
Asserts that the data source is not encrypted.
void add_mime_type(mime_type mt, confidence c)
Adds a mime type with a confidence level.
unique_identifier id() const
Returns the unique identifier for this data source.
std::optional< std::pair< mime_type, confidence > > highest_confidence_mime_type_info() const
Returns the MIME type with the highest confidence and its confidence level.
std::optional< docwire::file_extension > file_extension() const
Returns the file extension if available.
std::shared_ptr< std::istream > istream() const
Returns an input stream for reading the data.
data_source(const T &source, mime_type mime_type, confidence mime_type_confidence)
Constructs a data_source with an initial MIME type and confidence.
std::unordered_map< mime_type, confidence > mime_types
Map of detected MIME types and their confidence levels.
std::optional< std::filesystem::path > path() const
Returns the file path if the source is a file, otherwise std::nullopt.
std::string_view string_view(std::optional< length_limit > limit=std::nullopt) const
Returns the content as a string_view.
data_source(const T &source)
Constructs a data_source from a compatible type.
std::optional< mime_type > highest_confidence_mime_type() const
Returns the MIME type with the highest confidence.
data_source(T &&source, mime_type mime_type, confidence mime_type_confidence)
Constructs a data_source by moving, with an initial MIME type and confidence.
A class representing a file extension.
The class represents unique (for a single program run) identifier of an object.
The main namespace for the DocWire SDK.
concept data_source_compatible_type
Concept matching types that can be used to initialize a data_source.
concept data_source_compatible_type_ref_qualified
Concept matching reference-qualified types compatible with data_source.
confidence
Represents the confidence level of a detected MIME type.
Wrapper for a length limit value.
Wrapper for a MIME type string.
A helper for creating a visitor from a set of lambdas, used for visiting std::variant.
Wrapper for a shared pointer to a seekable input stream.
Wrapper for a shared pointer to an unseekable input stream.