From 85204b5209e16db698cf6f026c559f9952827afb Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Thu, 9 Aug 2018 14:48:46 +0100 Subject: [PATCH 1/2] Partition-wise maximum scores Added infrastructure to push through the 'person name field value' to the normalizer process. This is required by the normalizer to retrieve the maximum scores for individual partitions. --- .../BucketInfluencerNormalizable.java | 3 +++ .../process/normalizer/BucketNormalizable.java | 5 +++++ .../normalizer/InfluencerNormalizable.java | 5 +++++ .../MultiplyingNormalizerProcess.java | 9 +++++---- .../ml/job/process/normalizer/Normalizable.java | 2 ++ .../ml/job/process/normalizer/Normalizer.java | 2 ++ .../process/normalizer/NormalizerResult.java | 17 ++++++++++++++++- .../normalizer/PartitionScoreNormalizable.java | 5 +++++ .../process/normalizer/RecordNormalizable.java | 6 ++++++ .../BucketInfluencerNormalizableTests.java | 8 ++++++++ .../normalizer/BucketNormalizableTests.java | 4 ++++ .../normalizer/InfluencerNormalizableTests.java | 4 ++++ .../normalizer/NormalizerResultTests.java | 2 ++ 13 files changed, 67 insertions(+), 5 deletions(-) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/BucketInfluencerNormalizable.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/BucketInfluencerNormalizable.java index e55e5ac0346d8..99623ce0945c4 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/BucketInfluencerNormalizable.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/BucketInfluencerNormalizable.java @@ -46,6 +46,9 @@ public String getPersonFieldName() { return bucketInfluencer.getInfluencerFieldName(); } + @Override + public String getPersonFieldValue() { return null; } + @Override public String getFunctionName() { return null; diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/BucketNormalizable.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/BucketNormalizable.java index 1ba2e77040897..7ef23cb513b7f 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/BucketNormalizable.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/BucketNormalizable.java @@ -64,6 +64,11 @@ public String getPersonFieldName() { return null; } + @Override + public String getPersonFieldValue() { + return null; + } + @Override public String getFunctionName() { return null; diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/InfluencerNormalizable.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/InfluencerNormalizable.java index 74cb86a3fdfdd..bc1567ac00a14 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/InfluencerNormalizable.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/InfluencerNormalizable.java @@ -44,6 +44,11 @@ public String getPersonFieldName() { return influencer.getInfluencerFieldName(); } + @Override + public String getPersonFieldValue() { + return influencer.getInfluencerFieldValue(); + } + @Override public String getFunctionName() { return null; diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/MultiplyingNormalizerProcess.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/MultiplyingNormalizerProcess.java index fc7bd35188473..8aa266e15d22e 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/MultiplyingNormalizerProcess.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/MultiplyingNormalizerProcess.java @@ -63,10 +63,11 @@ public void writeRecord(String[] record) throws IOException { result.setPartitionFieldName(record[1]); result.setPartitionFieldValue(record[2]); result.setPersonFieldName(record[3]); - result.setFunctionName(record[4]); - result.setValueFieldName(record[5]); - result.setProbability(Double.parseDouble(record[6])); - result.setNormalizedScore(factor * Double.parseDouble(record[7])); + result.setPersonFieldValue(record[4]); + result.setFunctionName(record[5]); + result.setValueFieldName(record[6]); + result.setProbability(Double.parseDouble(record[7])); + result.setNormalizedScore(factor * Double.parseDouble(record[8])); } catch (NumberFormatException | ArrayIndexOutOfBoundsException e) { throw new IOException("Unable to write to no-op normalizer", e); } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/Normalizable.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/Normalizable.java index 606be98ae10e7..7efadf2961308 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/Normalizable.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/Normalizable.java @@ -44,6 +44,8 @@ public Normalizable(String indexName) { abstract String getPersonFieldName(); + abstract String getPersonFieldValue(); + abstract String getFunctionName(); abstract String getValueFieldName(); diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/Normalizer.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/Normalizer.java index 2c929ff4f1ae4..2d4e2135478f3 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/Normalizer.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/Normalizer.java @@ -70,6 +70,7 @@ public void normalize(Integer bucketSpan, boolean perPartitionNormalization, NormalizerResult.PARTITION_FIELD_NAME_FIELD.getPreferredName(), NormalizerResult.PARTITION_FIELD_VALUE_FIELD.getPreferredName(), NormalizerResult.PERSON_FIELD_NAME_FIELD.getPreferredName(), + NormalizerResult.PERSON_FIELD_VALUE_FIELD.getPreferredName(), NormalizerResult.FUNCTION_NAME_FIELD.getPreferredName(), NormalizerResult.VALUE_FIELD_NAME_FIELD.getPreferredName(), NormalizerResult.PROBABILITY_FIELD.getPreferredName(), @@ -108,6 +109,7 @@ private static void writeNormalizableAndChildrenRecursively(Normalizable normali Strings.coalesceToEmpty(normalizable.getPartitionFieldName()), Strings.coalesceToEmpty(normalizable.getPartitionFieldValue()), Strings.coalesceToEmpty(normalizable.getPersonFieldName()), + Strings.coalesceToEmpty(normalizable.getPersonFieldValue()), Strings.coalesceToEmpty(normalizable.getFunctionName()), Strings.coalesceToEmpty(normalizable.getValueFieldName()), Double.toString(normalizable.getProbability()), diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/NormalizerResult.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/NormalizerResult.java index 5cb2932a28ba5..43f87f33ed7a4 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/NormalizerResult.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/NormalizerResult.java @@ -26,6 +26,7 @@ public class NormalizerResult implements ToXContentObject, Writeable { static final ParseField PARTITION_FIELD_NAME_FIELD = new ParseField("partition_field_name"); static final ParseField PARTITION_FIELD_VALUE_FIELD = new ParseField("partition_field_value"); static final ParseField PERSON_FIELD_NAME_FIELD = new ParseField("person_field_name"); + static final ParseField PERSON_FIELD_VALUE_FIELD = new ParseField("person_field_value"); static final ParseField FUNCTION_NAME_FIELD = new ParseField("function_name"); static final ParseField VALUE_FIELD_NAME_FIELD = new ParseField("value_field_name"); static final ParseField PROBABILITY_FIELD = new ParseField("probability"); @@ -39,6 +40,7 @@ public class NormalizerResult implements ToXContentObject, Writeable { PARSER.declareString(NormalizerResult::setPartitionFieldName, PARTITION_FIELD_NAME_FIELD); PARSER.declareString(NormalizerResult::setPartitionFieldValue, PARTITION_FIELD_VALUE_FIELD); PARSER.declareString(NormalizerResult::setPersonFieldName, PERSON_FIELD_NAME_FIELD); + PARSER.declareString(NormalizerResult::setPersonFieldValue, PERSON_FIELD_VALUE_FIELD); PARSER.declareString(NormalizerResult::setFunctionName, FUNCTION_NAME_FIELD); PARSER.declareString(NormalizerResult::setValueFieldName, VALUE_FIELD_NAME_FIELD); PARSER.declareDouble(NormalizerResult::setProbability, PROBABILITY_FIELD); @@ -49,6 +51,7 @@ public class NormalizerResult implements ToXContentObject, Writeable { private String partitionFieldName; private String partitionFieldValue; private String personFieldName; + private String personFieldValue; private String functionName; private String valueFieldName; private double probability; @@ -62,6 +65,7 @@ public NormalizerResult(StreamInput in) throws IOException { partitionFieldName = in.readOptionalString(); partitionFieldValue = in.readOptionalString(); personFieldName = in.readOptionalString(); + personFieldValue = in.readOptionalString(); functionName = in.readOptionalString(); valueFieldName = in.readOptionalString(); probability = in.readDouble(); @@ -74,6 +78,7 @@ public void writeTo(StreamOutput out) throws IOException { out.writeOptionalString(partitionFieldName); out.writeOptionalString(partitionFieldValue); out.writeOptionalString(personFieldName); + out.writeOptionalString(personFieldValue); out.writeOptionalString(functionName); out.writeOptionalString(valueFieldName); out.writeDouble(probability); @@ -87,6 +92,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field(PARTITION_FIELD_NAME_FIELD.getPreferredName(), partitionFieldName); builder.field(PARTITION_FIELD_VALUE_FIELD.getPreferredName(), partitionFieldValue); builder.field(PERSON_FIELD_NAME_FIELD.getPreferredName(), personFieldName); + builder.field(PERSON_FIELD_VALUE_FIELD.getPreferredName(), personFieldValue); builder.field(FUNCTION_NAME_FIELD.getPreferredName(), functionName); builder.field(VALUE_FIELD_NAME_FIELD.getPreferredName(), valueFieldName); builder.field(PROBABILITY_FIELD.getPreferredName(), probability); @@ -127,6 +133,14 @@ public void setPersonFieldName(String personFieldName) { this.personFieldName = personFieldName; } + public String getPersonFieldValue() { + return personFieldValue; + } + + public void setPersonFieldValue(String personFieldValue) { + this.personFieldValue = personFieldValue; + } + public String getFunctionName() { return functionName; } @@ -161,7 +175,7 @@ public void setNormalizedScore(double normalizedScore) { @Override public int hashCode() { - return Objects.hash(level, partitionFieldName, partitionFieldValue, personFieldName, + return Objects.hash(level, partitionFieldName, partitionFieldValue, personFieldName, personFieldValue, functionName, valueFieldName, probability, normalizedScore); } @@ -184,6 +198,7 @@ public boolean equals(Object other) { && Objects.equals(this.partitionFieldName, that.partitionFieldName) && Objects.equals(this.partitionFieldValue, that.partitionFieldValue) && Objects.equals(this.personFieldName, that.personFieldName) + && Objects.equals(this.personFieldValue, that.personFieldValue) && Objects.equals(this.functionName, that.functionName) && Objects.equals(this.valueFieldName, that.valueFieldName) && this.probability == that.probability diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/PartitionScoreNormalizable.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/PartitionScoreNormalizable.java index 4d5d91aa12f8c..91b2a7a505e35 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/PartitionScoreNormalizable.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/PartitionScoreNormalizable.java @@ -45,6 +45,11 @@ public String getPersonFieldName() { return null; } + @Override + public String getPersonFieldValue() { + return null; + } + @Override public String getFunctionName() { return null; diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/RecordNormalizable.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/RecordNormalizable.java index 97114130c84d9..f3f32cb526ead 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/RecordNormalizable.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/RecordNormalizable.java @@ -46,6 +46,12 @@ public String getPersonFieldName() { return over != null ? over : record.getByFieldName(); } + @Override + public String getPersonFieldValue() { + String over = record.getOverFieldValue(); + return over != null ? over : record.getByFieldValue(); + } + @Override public String getFunctionName() { return record.getFunction(); diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/normalizer/BucketInfluencerNormalizableTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/normalizer/BucketInfluencerNormalizableTests.java index f83d51d84b009..bde5c3e44f9b4 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/normalizer/BucketInfluencerNormalizableTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/normalizer/BucketInfluencerNormalizableTests.java @@ -43,10 +43,18 @@ public void testGetPartitionFieldName() { assertNull(new BucketInfluencerNormalizable(bucketInfluencer, INDEX_NAME).getPartitionFieldName()); } + public void testGetPartitionFieldValue() { + assertNull(new BucketInfluencerNormalizable(bucketInfluencer, INDEX_NAME).getPartitionFieldValue()); + } + public void testGetPersonFieldName() { assertEquals("airline", new BucketInfluencerNormalizable(bucketInfluencer, INDEX_NAME).getPersonFieldName()); } + public void testGetPersonFieldValue() { + assertNull(new BucketInfluencerNormalizable(bucketInfluencer, INDEX_NAME).getPersonFieldValue()); + } + public void testGetFunctionName() { assertNull(new BucketInfluencerNormalizable(bucketInfluencer, INDEX_NAME).getFunctionName()); } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/normalizer/BucketNormalizableTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/normalizer/BucketNormalizableTests.java index 630bffe11129a..4436fcc7026fe 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/normalizer/BucketNormalizableTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/normalizer/BucketNormalizableTests.java @@ -73,6 +73,10 @@ public void testGetPersonFieldName() { assertNull(new BucketNormalizable(bucket, INDEX_NAME).getPersonFieldName()); } + public void testGetPersonFieldValue() { + assertNull(new BucketNormalizable(bucket, INDEX_NAME).getPersonFieldValue()); + } + public void testGetFunctionName() { assertNull(new BucketNormalizable(bucket, INDEX_NAME).getFunctionName()); } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/normalizer/InfluencerNormalizableTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/normalizer/InfluencerNormalizableTests.java index 215f88ad33224..ee5518b9c12d5 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/normalizer/InfluencerNormalizableTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/normalizer/InfluencerNormalizableTests.java @@ -44,6 +44,10 @@ public void testGetPersonFieldName() { assertEquals("airline", new InfluencerNormalizable(influencer, INDEX_NAME).getPersonFieldName()); } + public void testGetPersonFieldValue() { + assertEquals("AAL", new InfluencerNormalizable(influencer, INDEX_NAME).getPersonFieldValue()); + } + public void testGetFunctionName() { assertNull(new InfluencerNormalizable(influencer, INDEX_NAME).getFunctionName()); } diff --git a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/normalizer/NormalizerResultTests.java b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/normalizer/NormalizerResultTests.java index ecaea449f95c1..af35c01aa871d 100644 --- a/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/normalizer/NormalizerResultTests.java +++ b/x-pack/plugin/ml/src/test/java/org/elasticsearch/xpack/ml/job/process/normalizer/NormalizerResultTests.java @@ -19,6 +19,7 @@ public void testDefaultConstructor() { assertNull(msg.getPartitionFieldName()); assertNull(msg.getPartitionFieldValue()); assertNull(msg.getPersonFieldName()); + assertNull(msg.getPersonFieldValue()); assertNull(msg.getFunctionName()); assertNull(msg.getValueFieldName()); assertEquals(0.0, msg.getProbability(), EPSILON); @@ -32,6 +33,7 @@ protected NormalizerResult createTestInstance() { msg.setPartitionFieldName("part"); msg.setPartitionFieldValue("something"); msg.setPersonFieldName("person"); + msg.setPersonFieldValue("fred"); msg.setFunctionName("mean"); msg.setValueFieldName("value"); msg.setProbability(0.005); From a1ebc3354647a37cc5a84aa912936e26978a7ddf Mon Sep 17 00:00:00 2001 From: Ed Savage Date: Fri, 10 Aug 2018 14:08:26 +0100 Subject: [PATCH 2/2] [ML] Addressed @droberts code review comments Ensuring that changes are fully backwards compatible --- .../process/normalizer/BucketInfluencerNormalizable.java | 4 +++- .../ml/job/process/normalizer/NormalizerResult.java | 9 +++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/BucketInfluencerNormalizable.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/BucketInfluencerNormalizable.java index 99623ce0945c4..8ee5d1ad6e2d3 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/BucketInfluencerNormalizable.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/BucketInfluencerNormalizable.java @@ -47,7 +47,9 @@ public String getPersonFieldName() { } @Override - public String getPersonFieldValue() { return null; } + public String getPersonFieldValue() { + return null; + } @Override public String getFunctionName() { diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/NormalizerResult.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/NormalizerResult.java index 43f87f33ed7a4..269792dbe7797 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/NormalizerResult.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/normalizer/NormalizerResult.java @@ -5,6 +5,7 @@ */ package org.elasticsearch.xpack.ml.job.process.normalizer; +import org.elasticsearch.Version; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; @@ -65,7 +66,9 @@ public NormalizerResult(StreamInput in) throws IOException { partitionFieldName = in.readOptionalString(); partitionFieldValue = in.readOptionalString(); personFieldName = in.readOptionalString(); - personFieldValue = in.readOptionalString(); + if (in.getVersion().onOrAfter(Version.V_6_5_0)) { + personFieldValue = in.readOptionalString(); + } functionName = in.readOptionalString(); valueFieldName = in.readOptionalString(); probability = in.readDouble(); @@ -78,7 +81,9 @@ public void writeTo(StreamOutput out) throws IOException { out.writeOptionalString(partitionFieldName); out.writeOptionalString(partitionFieldValue); out.writeOptionalString(personFieldName); - out.writeOptionalString(personFieldValue); + if (out.getVersion().onOrAfter(Version.V_6_5_0)) { + out.writeOptionalString(personFieldValue); + } out.writeOptionalString(functionName); out.writeOptionalString(valueFieldName); out.writeDouble(probability);