elastic · mayya-sharipova · Mar 20, 2019 · cbuescher · Mar 20, 2019 · mayya-sharipova
diff --git a/docs/reference/query-dsl/script-score-query.asciidoc b/docs/reference/query-dsl/script-score-query.asciidoc
@@ -173,6 +173,110 @@ between a given query vector and document vectors.
 --------------------------------------------------
 // NOTCONSOLE
 
+For dense_vector fields, `l1norm` calculates L^1^ distance
+(Manhattan distance) between a given query vector and
+document vectors.
+
+[source,js]
+--------------------------------------------------
+{
+  "query": {
+    "script_score": {
+      "query": {
+        "match_all": {}
+      },
+      "script": {
+        "source": "l1norm(params.queryVector, doc['my_dense_vector'])",
+        "params": {
+          "queryVector": [4, 3.4, -0.2]
+        }
+      }
+    }
+  }
+}
+--------------------------------------------------
+// NOTCONSOLE
+
+Note that, unlike `cosineSimilarity` that represent
+similarity, `l1norm` and the shown below `l2norm` represent distances or
+differences. This means, that the mose similar are vectors,
+the less will be the scores produced by `l1norm` and `l2norm` functions.
+Thus, if you need more similar vectors to score higher, you should
+reverse the output from `l1norm` and `l2norm`:
+
+`"source": " 1/ l1norm(params.queryVector, doc['my_dense_vector'])"`
+
+For sparse_vector fields, `l1normSparse` calculates L^1^ distance
+between a given query vector and document vectors.
+
+[source,js]
+--------------------------------------------------
+{
+  "query": {
+    "script_score": {
+      "query": {
+        "match_all": {}
+      },
+      "script": {
+        "source": "l1normSparse(params.queryVector, doc['my_sparse_vector'])",
+        "params": {
+          "queryVector": {"2": 0.5, "10" : 111.3, "50": -1.3, "113": 14.8, "4545": 156.0}
+        }
+      }
+    }
+  }
+}
+--------------------------------------------------
+// NOTCONSOLE
+
+For dense_vector fields, `l2norm` calculates L^2^ distance
+(Euclidean distance) between a given query vector and
+document vectors.
+
+[source,js]
+--------------------------------------------------
+{
+  "query": {
+    "script_score": {
+      "query": {
+        "match_all": {}
+      },
+      "script": {
+        "source": "l2norm(params.queryVector, doc['my_dense_vector'])",
+        "params": {
+          "queryVector": [4, 3.4, -0.2]
+        }
+      }
+    }
+  }
+}
+--------------------------------------------------
+// NOTCONSOLE
+
+Similarly, for sparse_vector fields, `l2normSparse` calculates L^2^ distance
+between a given query vector and document vectors.
+
+[source,js]
+--------------------------------------------------
+{
+  "query": {
+    "script_score": {
+      "query": {
+        "match_all": {}
+      },
+      "script": {
+        "source": "l2normSparse(params.queryVector, doc['my_sparse_vector'])",
+        "params": {
+          "queryVector": {"2": 0.5, "10" : 111.3, "50": -1.3, "113": 14.8, "4545": 156.0}
+        }
+      }
+    }
+  }
+}
+--------------------------------------------------
+// NOTCONSOLE
+
+
 NOTE: If a document doesn't have a value for a vector field on which
 a vector function is executed, 0 is returned as a result
 for this document.

diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/query/ScoreScriptUtils.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/query/ScoreScriptUtils.java
@@ -32,6 +32,53 @@ public class ScoreScriptUtils {
 
     //**************FUNCTIONS FOR DENSE VECTORS
 
+    /**
+     * Calculate l1 norm - Manhattan distance
+     * between a query's dense vector and documents' dense vectors
+     *
+     * @param queryVector the query vector parsed as {@code List<Number>} from json
+     * @param dvs VectorScriptDocValues representing encoded documents' vectors
+     */
+    public static double l1norm(List<Number> queryVector, VectorScriptDocValues.DenseVectorScriptDocValues dvs){
+        BytesRef value = dvs.getEncodedValue();
+        if (value == null) return 0;
+        float[] docVector = VectorEncoderDecoder.decodeDenseVector(value);
+
+        int dims = Math.min(queryVector.size(), docVector.length);
+        int dim = 0;
+        double l1norm = 0;
+        Iterator<Number> queryVectorIter = queryVector.iterator();
+        while(dim < dims) {
+            l1norm += Math.abs(queryVectorIter.next().doubleValue() - docVector[dim]);
+            dim++;
+        }
+        return l1norm;
+    }
+
+    /**
+     * Calculate l2 norm - Euclidean distance
+     * between a query's dense vector and documents' dense vectors
+     *
+     * @param queryVector the query vector parsed as {@code List<Number>} from json
+     * @param dvs VectorScriptDocValues representing encoded documents' vectors
+     */
+    public static double l2norm(List<Number> queryVector, VectorScriptDocValues.DenseVectorScriptDocValues dvs){
+        BytesRef value = dvs.getEncodedValue();
+        if (value == null) return 0;
+        float[] docVector = VectorEncoderDecoder.decodeDenseVector(value);
+
+        int dims = Math.min(queryVector.size(), docVector.length);
+        int dim = 0;
+        double l2norm = 0;
+        Iterator<Number> queryVectorIter = queryVector.iterator();
+        while(dim < dims) {
+            double diff = queryVectorIter.next().doubleValue() - docVector[dim];
+            l2norm += diff * diff;
+            dim++;
+        }
+        return Math.sqrt(l2norm);
+    }
+
     /**
      * Calculate a dot product between a query's dense vector and documents' dense vectors
      *
@@ -100,6 +147,122 @@ private static double intDotProduct(List<Number> v1, float[] v2){
 
 
     //**************FUNCTIONS FOR SPARSE VECTORS
+    /**
+     * Calculate l1 norm - Manhattan distance
+     * between a query's sparse vector and documents' sparse vectors
+     *
+     * L1NormSparse is implemented as a class to use
+     * painless script caching to prepare queryVector
+     * only once per script execution for all documents.
+     * A user will call `l1normSparse(params.queryVector, doc['my_vector'])`
+     */
+    public static final class L1NormSparse {
+        final double[] queryValues;
+        final int[] queryDims;
+
+        // prepare queryVector once per script execution
+        // queryVector represents a map of dimensions to values
+        public L1NormSparse(Map<String, Number> queryVector) {
+            //break vector into two arrays dims and values
+            int n = queryVector.size();
+            queryDims = new int[n];
+            queryValues = new double[n];
+            int i = 0;
+            for (Map.Entry<String, Number> dimValue : queryVector.entrySet()) {
+                try {
+                    queryDims[i] = Integer.parseInt(dimValue.getKey());
+                } catch (final NumberFormatException e) {
+                    throw new IllegalArgumentException("Failed to parse a query vector dimension, it must be an integer!", e);
+                }
+                queryValues[i] = dimValue.getValue().doubleValue();
+                i++;
+            }
+            // Sort dimensions in the ascending order and sort values in the same order as their corresponding dimensions
+            sortSparseDimsDoubleValues(queryDims, queryValues, n);
+        }
+
+        public double l1normSparse(VectorScriptDocValues.SparseVectorScriptDocValues dvs) {
+            BytesRef value = dvs.getEncodedValue();
+            if (value == null) return 0;
+            int[] docDims = VectorEncoderDecoder.decodeSparseVectorDims(value);
+            float[] docValues = VectorEncoderDecoder.decodeSparseVector(value);
+            int queryIndex = 0;
+            int docIndex = 0;
+            double l1norm = 0;
+            // find common dimensions among vectors v1 and v2 and calculate l1norm based on common dimensions
+            while (queryIndex < queryDims.length && docIndex < docDims.length) {
+                if (queryDims[queryIndex] == docDims[docIndex]) {
+                    l1norm += Math.abs(queryValues[queryIndex] - docValues[docIndex]);
+                    queryIndex++;
+                    docIndex++;
+                } else if (queryDims[queryIndex] > docDims[docIndex]) {
+                    docIndex++;
+                } else {
+                    queryIndex++;
+                }
+            }
+            return l1norm;
+        }
+    }
+
+    /**
+     * Calculate l2 norm - Euclidean distance
+     * between a query's sparse vector and documents' sparse vectors
+     *
+     * L2NormSparse is implemented as a class to use
+     * painless script caching to prepare queryVector
+     * only once per script execution for all documents.
+     * A user will call `l2normSparse(params.queryVector, doc['my_vector'])`
+     */
+    public static final class L2NormSparse {
+        final double[] queryValues;
+        final int[] queryDims;
+
+        // prepare queryVector once per script execution
+        // queryVector represents a map of dimensions to values
+        public L2NormSparse(Map<String, Number> queryVector) {
+            //break vector into two arrays dims and values
+            int n = queryVector.size();
+            queryDims = new int[n];
+            queryValues = new double[n];
+            int i = 0;
+            for (Map.Entry<String, Number> dimValue : queryVector.entrySet()) {
+                try {
+                    queryDims[i] = Integer.parseInt(dimValue.getKey());
+                } catch (final NumberFormatException e) {
+                    throw new IllegalArgumentException("Failed to parse a query vector dimension, it must be an integer!", e);
+                }
+                queryValues[i] = dimValue.getValue().doubleValue();
+                i++;
+            }
+            // Sort dimensions in the ascending order and sort values in the same order as their corresponding dimensions
+            sortSparseDimsDoubleValues(queryDims, queryValues, n);
+        }
+
+        public double l2normSparse(VectorScriptDocValues.SparseVectorScriptDocValues dvs) {
+            BytesRef value = dvs.getEncodedValue();
+            if (value == null) return 0;
+            int[] docDims = VectorEncoderDecoder.decodeSparseVectorDims(value);
+            float[] docValues = VectorEncoderDecoder.decodeSparseVector(value);
+            int queryIndex = 0;
+            int docIndex = 0;
+            double l2norm = 0;
+            // find common dimensions among vectors v1 and v2 and calculate l1norm based on common dimensions
+            while (queryIndex < queryDims.length && docIndex < docDims.length) {
+                if (queryDims[queryIndex] == docDims[docIndex]) {
+                    double diff = queryValues[queryIndex] - docValues[docIndex];
+                    l2norm += diff * diff;
+                    queryIndex++;
+                    docIndex++;
+                } else if (queryDims[queryIndex] > docDims[docIndex]) {
+                    docIndex++;
+                } else {
+                    queryIndex++;
+                }
+            }
+            return Math.sqrt(l2norm);
+        }
+    }
 
     /**
      * Calculate a dot product between a query's sparse vector and documents' sparse vectors

diff --git a/...es/mapper-extras/src/main/resources/org/elasticsearch/index/query/docvalues_whitelist.txt b/...es/mapper-extras/src/main/resources/org/elasticsearch/index/query/docvalues_whitelist.txt
@@ -25,8 +25,12 @@ class org.elasticsearch.index.query.VectorScriptDocValues$SparseVectorScriptDocV
 }
 
 static_import {
+    double l1norm(List, VectorScriptDocValues.DenseVectorScriptDocValues) from_class org.elasticsearch.index.query.ScoreScriptUtils
+    double l2norm(List, VectorScriptDocValues.DenseVectorScriptDocValues) from_class org.elasticsearch.index.query.ScoreScriptUtils
     double cosineSimilarity(List, VectorScriptDocValues.DenseVectorScriptDocValues) bound_to org.elasticsearch.index.query.ScoreScriptUtils$CosineSimilarity
     double dotProduct(List, VectorScriptDocValues.DenseVectorScriptDocValues) from_class org.elasticsearch.index.query.ScoreScriptUtils
+    double l1normSparse(Map, VectorScriptDocValues.SparseVectorScriptDocValues) bound_to org.elasticsearch.index.query.ScoreScriptUtils$L1NormSparse
+    double l2normSparse(Map, VectorScriptDocValues.SparseVectorScriptDocValues) bound_to org.elasticsearch.index.query.ScoreScriptUtils$L2NormSparse
     double dotProductSparse(Map, VectorScriptDocValues.SparseVectorScriptDocValues) bound_to org.elasticsearch.index.query.ScoreScriptUtils$DotProductSparse
     double cosineSimilaritySparse(Map, VectorScriptDocValues.SparseVectorScriptDocValues) bound_to org.elasticsearch.index.query.ScoreScriptUtils$CosineSimilaritySparse
 }
diff --git a/modules/mapper-extras/src/test/java/org/elasticsearch/index/query/ScoreScriptUtilsTests.java b/modules/mapper-extras/src/test/java/org/elasticsearch/index/query/ScoreScriptUtilsTests.java
@@ -25,6 +25,8 @@
 import org.elasticsearch.index.query.ScoreScriptUtils.CosineSimilarity;
 import org.elasticsearch.index.query.ScoreScriptUtils.DotProductSparse;
 import org.elasticsearch.index.query.ScoreScriptUtils.CosineSimilaritySparse;
+import org.elasticsearch.index.query.ScoreScriptUtils.L1NormSparse;
+import org.elasticsearch.index.query.ScoreScriptUtils.L2NormSparse;
 
 import java.util.Arrays;
 import java.util.HashMap;
@@ -33,6 +35,9 @@
 
 import static org.elasticsearch.index.mapper.VectorEncoderDecoderTests.mockEncodeDenseVector;
 import static org.elasticsearch.index.query.ScoreScriptUtils.dotProduct;
+import static org.elasticsearch.index.query.ScoreScriptUtils.l1norm;
+import static org.elasticsearch.index.query.ScoreScriptUtils.l2norm;
+
 import static org.mockito.Mockito.mock;
 import static org.mockito.Mockito.when;
 
@@ -53,6 +58,14 @@ public void testDenseVectorFunctions() {
         CosineSimilarity cosineSimilarity = new CosineSimilarity(queryVector);
         double result2 = cosineSimilarity.cosineSimilarity(dvs);
         assertEquals("cosineSimilarity result is not equal to the expected value!", 0.78, result2, 0.1);
+
+        // test l1Norm
+        double result3 = l1norm(queryVector, dvs);
+        assertEquals("l1norm result is not equal to the expected value!", 485.18, result3, 0.1);
+
+        // test l2norm
+        double result4 = l2norm(queryVector, dvs);
+        assertEquals("l2norm result is not equal to the expected value!", 301.36, result4, 0.1);
     }
 
     public void testSparseVectorFunctions() {
@@ -78,5 +91,15 @@ public void testSparseVectorFunctions() {
         CosineSimilaritySparse cosineSimilaritySparse = new CosineSimilaritySparse(queryVector);
         double result2 = cosineSimilaritySparse.cosineSimilaritySparse(dvs);
         assertEquals("cosineSimilaritySparse result is not equal to the expected value!", 0.78, result2, 0.1);
+
+        // test l1norm
+        L1NormSparse l1Norm = new L1NormSparse(queryVector);
+        double result3 = l1Norm.l1normSparse(dvs);
+        assertEquals("l1normSparse result is not equal to the expected value!", 485.18, result3, 0.1);
+
+        // test l2norm
+        L2NormSparse l2Norm = new L2NormSparse(queryVector);
+        double result4 = l2Norm.l2normSparse(dvs);
+        assertEquals("l2normSparse result is not equal to the expected value!", 301.36, result4, 0.1);
     }
 }