This example demonstrates how to create embeddings for a document and queries using a local AI model and then calculate their similarity.
#include "docwire.h"
#include <iostream>
#include <vector>
#include <string>
#include <filesystem>
int main(int argc, char* argv[])
{
try
{
std::vector<message_ptr> passage_msgs;
std::filesystem::path(
"data_processing_definition.doc") | content_type::detector{} | office_formats_parser{} | plain_text_exporter() | local_ai::embed(
local_ai::embed::e5_passage_prefix) | passage_msgs;
ensure(passage_msgs.size()) == 1;
ensure(passage_msgs[0]->is<ai::embedding>()) ==
true;
auto passage_embedding = passage_msgs[0]->get<ai::embedding>();
ensure(passage_embedding.values.size()) == 384;
std::vector<message_ptr> similar_query_msgs;
ensure(similar_query_msgs.size()) == 1;
ensure(similar_query_msgs[0]->is<ai::embedding>()) ==
true;
auto similar_query_embedding = similar_query_msgs[0]->get<ai::embedding>();
std::vector<message_ptr> partial_query_msgs;
ensure(partial_query_msgs.size()) == 1;
ensure(partial_query_msgs[0]->is<ai::embedding>()) ==
true;
auto partial_query_embedding = partial_query_msgs[0]->get<ai::embedding>();
std::vector<message_ptr> dissimilar_query_msgs;
ensure(dissimilar_query_msgs.size()) == 1;
ensure(dissimilar_query_msgs[0]->is<ai::embedding>()) ==
true;
auto dissimilar_query_embedding = dissimilar_query_msgs[0]->get<ai::embedding>();
double sim =
cosine_similarity(passage_embedding.values, similar_query_embedding.values);
double partial_sim =
cosine_similarity(passage_embedding.values, partial_query_embedding.values);
double dissim =
cosine_similarity(passage_embedding.values, dissimilar_query_embedding.values);
}
catch (const std::exception& e)
{
return 1;
}
return 0;
}
static const std::string e5_query_prefix
Common prefix for query embeddings with E5 models.
static const std::string e5_passage_prefix
Common prefix for passage embeddings with E5 models.
DOCWIRE_CORE_EXPORT std::string diagnostic_message(const std::exception &e)
Generates a diagnostic message for the given nested exceptions chain.
The main namespace for the DocWire SDK.
ensure(const T &, const docwire::source_location &) -> ensure< T >
Deduction guide for the ensure class template.
DOCWIRE_CORE_EXPORT double cosine_similarity(const std::vector< double > &a, const std::vector< double > &b)
Calculates the cosine similarity between two vectors.