From cd96be73a375bea8ec5d8dd48ff3b5299dae6f9e Mon Sep 17 00:00:00 2001 From: Sam Malayek Date: Sun, 12 Oct 2025 12:42:17 -0700 Subject: [PATCH 1/5] Add --embd-output-format raw for plain numeric embedding output This new option outputs embeddings as raw space-separated floats, without JSON or 'embedding N:' prefixes. Useful for downstream vector pipelines and scripting. --- examples/embedding/embedding.cpp | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp index 388908bc4d70a..11b44857a9856 100644 --- a/examples/embedding/embedding.cpp +++ b/examples/embedding/embedding.cpp @@ -4,6 +4,7 @@ #include "llama.h" #include +#include #include #if defined(_MSC_VER) @@ -70,6 +71,29 @@ static void batch_decode(llama_context * ctx, llama_batch & batch, float * outpu } } +// plain, pipe-friendly output: one embedding per line +static void print_raw_embeddings(const float * emb, + int n_embd_count, + int n_embd, + const llama_model * model, + enum llama_pooling_type pooling_type, + int embd_normalize) { + const uint32_t n_cls_out = llama_model_n_cls_out(model); + const bool is_rank = (pooling_type == LLAMA_POOLING_TYPE_RANK); + const int cols = is_rank ? std::min(n_embd, (int) n_cls_out) : n_embd; + + for (int j = 0; j < n_embd_count; ++j) { + for (int i = 0; i < cols; ++i) { + if (embd_normalize == 0) { + printf("%1.0f%s", emb[j * n_embd + i], (i + 1 < cols ? " " : "")); + } else { + printf("%1.7f%s", emb[j * n_embd + i], (i + 1 < cols ? " " : "")); + } + } + printf("\n"); + } +} + int main(int argc, char ** argv) { common_params params; @@ -259,6 +283,10 @@ int main(int argc, char ** argv) { float * out = emb + e * n_embd; batch_decode(ctx, batch, out, s, n_embd, params.embd_normalize); + if (params.embd_out == "raw") { + print_raw_embeddings(emb, n_embd_count, n_embd, model, pooling_type, params.embd_normalize); + } + if (params.embd_out.empty()) { LOG("\n"); From c66712074ccf7a409ecbacf31f070035d9b46fc3 Mon Sep 17 00:00:00 2001 From: Sam Malayek Date: Mon, 13 Oct 2025 11:33:41 -0700 Subject: [PATCH 2/5] Move raw output handling into format handling section --- examples/embedding/embedding.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp index 11b44857a9856..8b25fcdb4fe7a 100644 --- a/examples/embedding/embedding.cpp +++ b/examples/embedding/embedding.cpp @@ -283,10 +283,6 @@ int main(int argc, char ** argv) { float * out = emb + e * n_embd; batch_decode(ctx, batch, out, s, n_embd, params.embd_normalize); - if (params.embd_out == "raw") { - print_raw_embeddings(emb, n_embd_count, n_embd, model, pooling_type, params.embd_normalize); - } - if (params.embd_out.empty()) { LOG("\n"); @@ -402,6 +398,10 @@ int main(int argc, char ** argv) { if (notArray) LOG("\n}\n"); } + if (params.embd_out == "raw") { + print_raw_embeddings(emb, n_embd_count, n_embd, model, pooling_type, params.embd_normalize); + } + LOG("\n"); llama_perf_context_print(ctx); From 883e07aa854a531bfe7c887b0eaae19b5144de29 Mon Sep 17 00:00:00 2001 From: Sam Malayek Date: Tue, 21 Oct 2025 22:56:44 -0700 Subject: [PATCH 3/5] Move raw output handling into else-if block with other format handlers --- examples/embedding/embedding.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp index 8b25fcdb4fe7a..84f929480eaf0 100644 --- a/examples/embedding/embedding.cpp +++ b/examples/embedding/embedding.cpp @@ -396,9 +396,7 @@ int main(int argc, char ** argv) { } if (notArray) LOG("\n}\n"); - } - - if (params.embd_out == "raw") { + } else if (params.embd_out == "raw") { print_raw_embeddings(emb, n_embd_count, n_embd, model, pooling_type, params.embd_normalize); } From ce7b1879b7ce8383fe44a7ba9c102855e32d3db1 Mon Sep 17 00:00:00 2001 From: Sam Malayek Date: Mon, 27 Oct 2025 13:10:39 -0700 Subject: [PATCH 4/5] Use LOG instead of printf for raw embedding output --- examples/embedding/embedding.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp index 84f929480eaf0..9e3ab5905bb37 100644 --- a/examples/embedding/embedding.cpp +++ b/examples/embedding/embedding.cpp @@ -4,7 +4,6 @@ #include "llama.h" #include -#include #include #if defined(_MSC_VER) @@ -85,12 +84,12 @@ static void print_raw_embeddings(const float * emb, for (int j = 0; j < n_embd_count; ++j) { for (int i = 0; i < cols; ++i) { if (embd_normalize == 0) { - printf("%1.0f%s", emb[j * n_embd + i], (i + 1 < cols ? " " : "")); + LOG("%1.0f%s", emb[j * n_embd + i], (i + 1 < cols ? " " : "")); } else { - printf("%1.7f%s", emb[j * n_embd + i], (i + 1 < cols ? " " : "")); + LOG("%1.7f%s", emb[j * n_embd + i], (i + 1 < cols ? " " : "")); } } - printf("\n"); + LOG("\n"); } } From 252563dd16e0361587835e114c455a249151e392 Mon Sep 17 00:00:00 2001 From: Sam Malayek Date: Tue, 28 Oct 2025 02:11:33 -0700 Subject: [PATCH 5/5] docs: document 'raw' embedding output format in arg.cpp and README --- common/arg.cpp | 2 +- examples/embedding/README.md | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/common/arg.cpp b/common/arg.cpp index c0b718071127d..b2af64dc3eed4 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -3248,7 +3248,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex ).set_examples({LLAMA_EXAMPLE_EMBEDDING})); add_opt(common_arg( {"--embd-output-format"}, "FORMAT", - "empty = default, \"array\" = [[],[]...], \"json\" = openai style, \"json+\" = same \"json\" + cosine similarity matrix", + "empty = default, \"array\" = [[],[]...], \"json\" = openai style, \"json+\" = same \"json\" + cosine similarity matrix, \"raw\" = plain whitespace-delimited output (one embedding per line)", [](common_params & params, const std::string & value) { params.embd_out = value; } diff --git a/examples/embedding/README.md b/examples/embedding/README.md index 3dd279d9fc41a..1684f36480d82 100644 --- a/examples/embedding/README.md +++ b/examples/embedding/README.md @@ -38,6 +38,7 @@ The above command will output space-separated float values. | | multiple embeddings | $[[x_1,...,x_n],[x_1,...,x_n],...,[x_1,...,x_n]]$ | 'json' | openai style | | 'json+' | add cosine similarity matrix | +| 'raw' | plain text output | ### --embd-separator $"string"$ | $"string"$ | |