From ce7aee4e8a2b2c3fa5e892b476d33e2dd73440a9 Mon Sep 17 00:00:00 2001 From: Rajesh Balamohan Date: Mon, 20 Jul 2020 15:19:41 +0530 Subject: [PATCH 1/2] HIVE-23870: Optimise multiple text conversions in WritableHiveCharObjectInspector.getPrimitiveJavaObject / HiveCharWritable --- .../hadoop/hive/serde2/io/HiveBaseCharWritable.java | 8 +++++++- .../hadoop/hive/serde2/io/HiveCharWritable.java | 11 ++++++++++- .../hadoop/hive/serde2/io/HiveVarcharWritable.java | 2 ++ 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveBaseCharWritable.java b/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveBaseCharWritable.java index 5b7b3b4d7965..c4bd6ff93411 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveBaseCharWritable.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveBaseCharWritable.java @@ -27,12 +27,17 @@ public abstract class HiveBaseCharWritable { protected Text value = new Text(); + protected int charLength = -1; public HiveBaseCharWritable() { } public int getCharacterLength() { - return HiveStringUtils.getTextUtfLength(value); + if (charLength != -1) { + return charLength; + } + charLength = HiveStringUtils.getTextUtfLength(value); + return charLength; } /** @@ -45,6 +50,7 @@ public Text getTextValue() { public void readFields(DataInput in) throws IOException { value.readFields(in); + charLength = -1; } public void write(DataOutput out) throws IOException { diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveCharWritable.java b/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveCharWritable.java index 5cc10a8f4781..873007da9d92 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveCharWritable.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveCharWritable.java @@ -53,6 +53,7 @@ public void set(String val) { public void set(HiveCharWritable val) { value.set(val.value); + charLength = -1; } public void set(HiveCharWritable val, int maxLength) { @@ -65,6 +66,7 @@ public void set(HiveChar val, int len) { public void set(String val, int maxLength) { value.set(HiveBaseChar.getPaddedValue(val, maxLength)); + charLength = maxLength; } public HiveChar getHiveChar() { @@ -78,6 +80,9 @@ public void enforceMaxLength(int maxLength) { } public Text getStrippedValue() { + if (value.charAt(value.getLength() - 1) != ' ') { + return value; + } // A lot of these methods could be done more efficiently by operating on the Text value // directly, rather than converting to HiveChar. return new Text(getHiveChar().getStrippedValue()); @@ -88,7 +93,11 @@ public Text getPaddedValue() { } public int getCharacterLength() { - return HiveStringUtils.getTextUtfLength(getStrippedValue()); + if (charLength != -1) { + return charLength; + } + charLength = HiveStringUtils.getTextUtfLength(getStrippedValue()); + return charLength; } public int compareTo(HiveCharWritable rhs) { diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveVarcharWritable.java b/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveVarcharWritable.java index 796c533b2a72..c3812d62e11b 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveVarcharWritable.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveVarcharWritable.java @@ -45,6 +45,7 @@ public void set(String val) { public void set(HiveVarcharWritable val) { value.set(val.value); + charLength = val.charLength; } public void set(HiveVarcharWritable val, int maxLength) { @@ -57,6 +58,7 @@ public void set(HiveVarchar val, int len) { public void set(String val, int maxLength) { value.set(HiveBaseChar.enforceMaxLength(val, maxLength)); + charLength = maxLength; } public HiveVarchar getHiveVarchar() { From c0fd2be8810d4687df63796107780364e1dc3f1a Mon Sep 17 00:00:00 2001 From: Rajesh Balamohan Date: Tue, 21 Jul 2020 13:26:46 +0530 Subject: [PATCH 2/2] HIVE-23870: Optimise multiple text conversions in WritableHiveCharObjectInspector.getPrimitiveJavaObject / HiveCharWritable --- .../java/org/apache/hadoop/hive/serde2/io/HiveCharWritable.java | 1 - 1 file changed, 1 deletion(-) diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveCharWritable.java b/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveCharWritable.java index 873007da9d92..ea3b8e58cebe 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveCharWritable.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/io/HiveCharWritable.java @@ -66,7 +66,6 @@ public void set(HiveChar val, int len) { public void set(String val, int maxLength) { value.set(HiveBaseChar.getPaddedValue(val, maxLength)); - charLength = maxLength; } public HiveChar getHiveChar() {