From cd96be73a375bea8ec5d8dd48ff3b5299dae6f9e Mon Sep 17 00:00:00 2001
From: Sam Malayek <malayek@gmail.com>
Date: Sun, 12 Oct 2025 12:42:17 -0700
Subject: [PATCH 1/5] Add --embd-output-format raw for plain numeric embedding
 output

This new option outputs embeddings as raw space-separated floats, without JSON or 'embedding N:' prefixes. Useful for downstream vector pipelines and scripting.
---
 examples/embedding/embedding.cpp | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)
diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp
index 388908bc4d70a..11b44857a9856 100644
--- a/examples/embedding/embedding.cpp
+++ b/examples/embedding/embedding.cpp
@@ -4,6 +4,7 @@
 #include "llama.h"
 
 #include <ctime>
+#include <cstdio>
 #include <algorithm>
 
 #if defined(_MSC_VER)
@@ -70,6 +71,29 @@ static void batch_decode(llama_context * ctx, llama_batch & batch, float * outpu
     }
 }
 
+// plain, pipe-friendly output: one embedding per line
+static void print_raw_embeddings(const float * emb,
+                                 int n_embd_count,
+                                 int n_embd,
+                                 const llama_model * model,
+                                 enum llama_pooling_type pooling_type,
+                                 int embd_normalize) {
+    const uint32_t n_cls_out = llama_model_n_cls_out(model);
+    const bool is_rank = (pooling_type == LLAMA_POOLING_TYPE_RANK);
+    const int cols = is_rank ? std::min<int>(n_embd, (int) n_cls_out) : n_embd;
+
+    for (int j = 0; j < n_embd_count; ++j) {
+        for (int i = 0; i < cols; ++i) {
+            if (embd_normalize == 0) {
+                printf("%1.0f%s", emb[j * n_embd + i], (i + 1 < cols ? " " : ""));
+            } else {
+                printf("%1.7f%s", emb[j * n_embd + i], (i + 1 < cols ? " " : ""));
+            }
+        }
+        printf("\n");
+    }
+}
+
 int main(int argc, char ** argv) {
     common_params params;
 
@@ -259,6 +283,10 @@ int main(int argc, char ** argv) {
     float * out = emb + e * n_embd;
     batch_decode(ctx, batch, out, s, n_embd, params.embd_normalize);
 
+    if (params.embd_out == "raw") {
+        print_raw_embeddings(emb, n_embd_count, n_embd, model, pooling_type, params.embd_normalize);
+    }
+
     if (params.embd_out.empty()) {
         LOG("\n");
 

From c66712074ccf7a409ecbacf31f070035d9b46fc3 Mon Sep 17 00:00:00 2001
From: Sam Malayek <malayek@gmail.com>
Date: Mon, 13 Oct 2025 11:33:41 -0700
Subject: [PATCH 2/5] Move raw output handling into format handling section

---
 examples/embedding/embedding.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp
index 11b44857a9856..8b25fcdb4fe7a 100644
--- a/examples/embedding/embedding.cpp
+++ b/examples/embedding/embedding.cpp
@@ -283,10 +283,6 @@ int main(int argc, char ** argv) {
     float * out = emb + e * n_embd;
     batch_decode(ctx, batch, out, s, n_embd, params.embd_normalize);
 
-    if (params.embd_out == "raw") {
-        print_raw_embeddings(emb, n_embd_count, n_embd, model, pooling_type, params.embd_normalize);
-    }
-
     if (params.embd_out.empty()) {
         LOG("\n");
 
@@ -402,6 +398,10 @@ int main(int argc, char ** argv) {
         if (notArray) LOG("\n}\n");
     }
 
+    if (params.embd_out == "raw") {
+        print_raw_embeddings(emb, n_embd_count, n_embd, model, pooling_type, params.embd_normalize);
+    }
+
     LOG("\n");
     llama_perf_context_print(ctx);
 

From 883e07aa854a531bfe7c887b0eaae19b5144de29 Mon Sep 17 00:00:00 2001
From: Sam Malayek <malayek@gmail.com>
Date: Tue, 21 Oct 2025 22:56:44 -0700
Subject: [PATCH 3/5] Move raw output handling into else-if block with other
 format handlers

---
 examples/embedding/embedding.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp
index 8b25fcdb4fe7a..84f929480eaf0 100644
--- a/examples/embedding/embedding.cpp
+++ b/examples/embedding/embedding.cpp
@@ -396,9 +396,7 @@ int main(int argc, char ** argv) {
         }
 
         if (notArray) LOG("\n}\n");
-    }
-
-    if (params.embd_out == "raw") {
+    } else if (params.embd_out == "raw") {
         print_raw_embeddings(emb, n_embd_count, n_embd, model, pooling_type, params.embd_normalize);
     }
 

From ce7b1879b7ce8383fe44a7ba9c102855e32d3db1 Mon Sep 17 00:00:00 2001
From: Sam Malayek <malayek@gmail.com>
Date: Mon, 27 Oct 2025 13:10:39 -0700
Subject: [PATCH 4/5] Use LOG instead of printf for raw embedding output

---
 examples/embedding/embedding.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp
index 84f929480eaf0..9e3ab5905bb37 100644
--- a/examples/embedding/embedding.cpp
+++ b/examples/embedding/embedding.cpp
@@ -4,7 +4,6 @@
 #include "llama.h"
 
 #include <ctime>
-#include <cstdio>
 #include <algorithm>
 
 #if defined(_MSC_VER)
@@ -85,12 +84,12 @@ static void print_raw_embeddings(const float * emb,
     for (int j = 0; j < n_embd_count; ++j) {
         for (int i = 0; i < cols; ++i) {
             if (embd_normalize == 0) {
-                printf("%1.0f%s", emb[j * n_embd + i], (i + 1 < cols ? " " : ""));
+                LOG("%1.0f%s", emb[j * n_embd + i], (i + 1 < cols ? " " : ""));
             } else {
-                printf("%1.7f%s", emb[j * n_embd + i], (i + 1 < cols ? " " : ""));
+                LOG("%1.7f%s", emb[j * n_embd + i], (i + 1 < cols ? " " : ""));
             }
         }
-        printf("\n");
+        LOG("\n");
     }
 }
 

From 252563dd16e0361587835e114c455a249151e392 Mon Sep 17 00:00:00 2001
From: Sam Malayek <malayek@gmail.com>
Date: Tue, 28 Oct 2025 02:11:33 -0700
Subject: [PATCH 5/5] docs: document 'raw' embedding output format in arg.cpp
 and README

---
 common/arg.cpp               | 2 +-
 examples/embedding/README.md | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/common/arg.cpp b/common/arg.cpp
index c0b718071127d..b2af64dc3eed4 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -3248,7 +3248,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
     ).set_examples({LLAMA_EXAMPLE_EMBEDDING}));
     add_opt(common_arg(
         {"--embd-output-format"}, "FORMAT",
-        "empty = default, \"array\" = [[],[]...], \"json\" = openai style, \"json+\" = same \"json\" + cosine similarity matrix",
+        "empty = default, \"array\" = [[],[]...], \"json\" = openai style, \"json+\" = same \"json\" + cosine similarity matrix, \"raw\" = plain whitespace-delimited output (one embedding per line)",
         [](common_params & params, const std::string & value) {
             params.embd_out = value;
         }
diff --git a/examples/embedding/README.md b/examples/embedding/README.md
index 3dd279d9fc41a..1684f36480d82 100644
--- a/examples/embedding/README.md
+++ b/examples/embedding/README.md
@@ -38,6 +38,7 @@ The above command will output space-separated float values.
 |            | multiple embeddings          | $[[x_1,...,x_n],[x_1,...,x_n],...,[x_1,...,x_n]]$
 | 'json'     | openai style                 |
 | 'json+'    | add cosine similarity matrix |
+| 'raw'      | plain text output            |
 
 ### --embd-separator $"string"$
 | $"string"$   | |