From 1b9b2e73b8637b8da15b790a06b08e8810fc75d9 Mon Sep 17 00:00:00 2001
From: xuejianbest <384329882@qq.com>
Date: Thu, 9 Aug 2018 11:11:04 +0800
Subject: [PATCH 01/10] Fix the show method to display the wide character
 alignment problem
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

before:
+---+---------------------------+-------------+
|id |中国                         |s2           |
+---+---------------------------+-------------+
|1  |ab                         |[a]          |
|2  |null                       |[中国, abc]    |
|3  |ab1                        |[hello world]|
|4  |か行 きゃ(kya) きゅ(kyu) きょ(kyo) |[“中国]        |
|5  |中国（你好）a                    |[“中（国）, 312] |
|6  |中国山(东)服务区                  |[“中(国）]      |
|7  |中国山东服务区                    |[中(国)]       |
|8  |                           |[中国]         |
+---+---------------------------+-------------+

after:
+---+--------------------------+----------------+
| id|                      中国|              s2|
+---+--------------------------+----------------+
|  1|                        ab|             [a]|
|  2|                      null|     [中国, abc]|
|  3|                       ab1|   [hello world]|
|  4|か行 きゃ(kya) きゅ(kyu...|         [“中国]|
|  5|             中国（你好）a|[“中（国）, 312]|
|  6|          中国山(东)服务区|      [“中(国）]|
|  7|            中国山东服务区|        [中(国)]|
|  8|                          |          [中国]|
+---+--------------------------+----------------+
---
 .../scala/org/apache/spark/sql/Dataset.scala  | 21 +++++++++----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 0aee1d7be578..6af76ca81127 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -280,6 +280,7 @@ class Dataset[T] private[sql](
     // We set a minimum column width at '3'
     val minimumColWidth = 3
 
+    val regex = """[^\x00-\xff����]""".r
     if (!vertical) {
       // Initialise the width of each column to a minimum value
       val colWidths = Array.fill(numCols)(minimumColWidth)
@@ -287,7 +288,7 @@ class Dataset[T] private[sql](
       // Compute the width of each column
       for (row <- rows) {
         for ((cell, i) <- row.zipWithIndex) {
-          colWidths(i) = math.max(colWidths(i), cell.length)
+          colWidths(i) = math.max(colWidths(i), cell.length + regex.findAllIn(cell).size)
         }
       }
 
@@ -297,9 +298,9 @@ class Dataset[T] private[sql](
       // column names
       rows.head.zipWithIndex.map { case (cell, i) =>
         if (truncate > 0) {
-          StringUtils.leftPad(cell, colWidths(i))
+          StringUtils.leftPad(cell, colWidths(i) - regex.findAllIn(cell).size)
         } else {
-          StringUtils.rightPad(cell, colWidths(i))
+          StringUtils.rightPad(cell, colWidths(i) - regex.findAllIn(cell).size)
         }
       }.addString(sb, "|", "|", "|\n")
 
@@ -309,9 +310,9 @@ class Dataset[T] private[sql](
       rows.tail.foreach {
         _.zipWithIndex.map { case (cell, i) =>
           if (truncate > 0) {
-            StringUtils.leftPad(cell.toString, colWidths(i))
+            StringUtils.leftPad(cell.toString, colWidths(i) - regex.findAllIn(cell).size)
           } else {
-            StringUtils.rightPad(cell.toString, colWidths(i))
+            StringUtils.rightPad(cell.toString, colWidths(i) - regex.findAllIn(cell).size)
           }
         }.addString(sb, "|", "|", "|\n")
       }
@@ -324,12 +325,10 @@ class Dataset[T] private[sql](
 
       // Compute the width of field name and data columns
       val fieldNameColWidth = fieldNames.foldLeft(minimumColWidth) { case (curMax, fieldName) =>
-        math.max(curMax, fieldName.length)
+        math.max(curMax, fieldName.length + regex.findAllIn(fieldName).size)
       }
       val dataColWidth = dataRows.foldLeft(minimumColWidth) { case (curMax, row) =>
-        math.max(curMax, row.map(_.length).reduceLeftOption[Int] { case (cellMax, cell) =>
-          math.max(cellMax, cell)
-        }.getOrElse(0))
+        math.max(curMax, row.map(cell => cell.length + regex.findAllIn(cell).size).max)
       }
 
       dataRows.zipWithIndex.foreach { case (row, i) =>
@@ -338,8 +337,8 @@ class Dataset[T] private[sql](
           s"-RECORD $i", fieldNameColWidth + dataColWidth + 5, "-")
         sb.append(rowHeader).append("\n")
         row.zipWithIndex.map { case (cell, j) =>
-          val fieldName = StringUtils.rightPad(fieldNames(j), fieldNameColWidth)
-          val data = StringUtils.rightPad(cell, dataColWidth)
+          val fieldName = StringUtils.rightPad(fieldNames(j), fieldNameColWidth - regex.findAllIn(fieldNames(j)).size)
+          val data = StringUtils.rightPad(cell, dataColWidth - regex.findAllIn(cell).size)
           s" $fieldName | $data "
         }.addString(sb, "", "\n", "\n")
       }

From 906c0ba736552ae83c1fa3be1b501e9dbb61c5b1 Mon Sep 17 00:00:00 2001
From: xuejianbest <384329882@qq.com>
Date: Fri, 10 Aug 2018 17:38:31 +0800
Subject: [PATCH 02/10] Modifying regular expressions for matching narrow
 characters

---
 sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 9f555fd0abfa..40459d4c46ba 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -294,7 +294,7 @@ class Dataset[T] private[sql](
     // We set a minimum column width at '3'
     val minimumColWidth = 3
 
-    val regex = """[^\x00-\xff“”]""".r
+    val regex = """[^\x00-\u2e39]""".r
     if (!vertical) {
       // Initialise the width of each column to a minimum value
       val colWidths = Array.fill(numCols)(minimumColWidth)

From da37d2ef68c0212b723dd96b4ae571f6a16f03ad Mon Sep 17 00:00:00 2001
From: xuejianbest <384329882@qq.com>
Date: Tue, 28 Aug 2018 17:05:14 +0800
Subject: [PATCH 03/10] Fix the show method to display full width characters.

Modify regular expressions to make them more precise.
Modify variable names and add comment.
---
 .../scala/org/apache/spark/sql/Dataset.scala    | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 40459d4c46ba..bdd2bec26ef0 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -294,7 +294,8 @@ class Dataset[T] private[sql](
     // We set a minimum column width at '3'
     val minimumColWidth = 3
 
-    val regex = """[^\x00-\u2e39]""".r
+    //Regular expression matching full width characters
+    val fullWidthRegex = """[\u1100-\u115F\u2E80-\uA4CF\uAC00-\uD7A3\uF900-\uFAFF\uFE10-\uFE19\uFE30-\uFE6F\uFF00-\uFF60\uFFE0-\uFFE6]""".r
     if (!vertical) {
       // Initialise the width of each column to a minimum value
       val colWidths = Array.fill(numCols)(minimumColWidth)
@@ -302,16 +303,16 @@ class Dataset[T] private[sql](
       // Compute the width of each column
       for (row <- rows) {
         for ((cell, i) <- row.zipWithIndex) {
-          colWidths(i) = math.max(colWidths(i), cell.length + regex.findAllIn(cell).size)
+          colWidths(i) = math.max(colWidths(i), cell.length + fullWidthRegex.findAllIn(cell).size)
         }
       }
 
       val paddedRows = rows.map { row =>
         row.zipWithIndex.map { case (cell, i) =>
           if (truncate > 0) {
-            StringUtils.leftPad(cell, colWidths(i) - regex.findAllIn(cell).size)
+            StringUtils.leftPad(cell, colWidths(i) - fullWidthRegex.findAllIn(cell).size)
           } else {
-            StringUtils.rightPad(cell, colWidths(i) - regex.findAllIn(cell).size)
+            StringUtils.rightPad(cell, colWidths(i) - fullWidthRegex.findAllIn(cell).size)
           }
         }
       }
@@ -333,10 +334,10 @@ class Dataset[T] private[sql](
 
       // Compute the width of field name and data columns
       val fieldNameColWidth = fieldNames.foldLeft(minimumColWidth) { case (curMax, fieldName) =>
-        math.max(curMax, fieldName.length + regex.findAllIn(fieldName).size)
+        math.max(curMax, fieldName.length + fullWidthRegex.findAllIn(fieldName).size)
       }
       val dataColWidth = dataRows.foldLeft(minimumColWidth) { case (curMax, row) =>
-        math.max(curMax, row.map(cell => cell.length + regex.findAllIn(cell).size).max)
+        math.max(curMax, row.map(cell => cell.length + fullWidthRegex.findAllIn(cell).size).max)
       }
 
       dataRows.zipWithIndex.foreach { case (row, i) =>
@@ -345,8 +346,8 @@ class Dataset[T] private[sql](
           s"-RECORD $i", fieldNameColWidth + dataColWidth + 5, "-")
         sb.append(rowHeader).append("\n")
         row.zipWithIndex.map { case (cell, j) =>
-          val fieldName = StringUtils.rightPad(fieldNames(j), fieldNameColWidth - regex.findAllIn(fieldNames(j)).size)
-          val data = StringUtils.rightPad(cell, dataColWidth - regex.findAllIn(cell).size)
+          val fieldName = StringUtils.rightPad(fieldNames(j), fieldNameColWidth - fullWidthRegex.findAllIn(fieldNames(j)).size)
+          val data = StringUtils.rightPad(cell, dataColWidth - fullWidthRegex.findAllIn(cell).size)
           s" $fieldName | $data "
         }.addString(sb, "", "\n", "\n")
       }

From 8737671db590e82118aa729c690f51b7af8d5674 Mon Sep 17 00:00:00 2001
From: xuejianbest <384329882@qq.com>
Date: Wed, 29 Aug 2018 11:42:37 +0800
Subject: [PATCH 04/10] Write a utility method to get the number of half width
 of string

Putting `fullWidthRegex` outside `stringHalfWidth` is to reduce the cost of compiling regular expressions.
Since compilation is expensive, frequently used Regexes should be constructed once.
---
 .../scala/org/apache/spark/sql/Dataset.scala  | 20 +++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index bdd2bec26ef0..7382e2af3929 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -294,8 +294,12 @@ class Dataset[T] private[sql](
     // We set a minimum column width at '3'
     val minimumColWidth = 3
 
-    //Regular expression matching full width characters
+    // Regular expression matching full width characters
     val fullWidthRegex = """[\u1100-\u115F\u2E80-\uA4CF\uAC00-\uD7A3\uF900-\uFAFF\uFE10-\uFE19\uFE30-\uFE6F\uFF00-\uFF60\uFFE0-\uFFE6]""".r
+    // The number of half width of a string
+    def stringHalfWidth = (str: String) => {
+      str.length + fullWidthRegex.findAllIn(str).size
+    }
     if (!vertical) {
       // Initialise the width of each column to a minimum value
       val colWidths = Array.fill(numCols)(minimumColWidth)
@@ -303,16 +307,16 @@ class Dataset[T] private[sql](
       // Compute the width of each column
       for (row <- rows) {
         for ((cell, i) <- row.zipWithIndex) {
-          colWidths(i) = math.max(colWidths(i), cell.length + fullWidthRegex.findAllIn(cell).size)
+          colWidths(i) = math.max(colWidths(i), stringHalfWidth(cell))
         }
       }
 
       val paddedRows = rows.map { row =>
         row.zipWithIndex.map { case (cell, i) =>
           if (truncate > 0) {
-            StringUtils.leftPad(cell, colWidths(i) - fullWidthRegex.findAllIn(cell).size)
+            " " * (colWidths(i) - stringHalfWidth(cell)) + cell
           } else {
-            StringUtils.rightPad(cell, colWidths(i) - fullWidthRegex.findAllIn(cell).size)
+            cell + " " * (colWidths(i) - stringHalfWidth(cell))
           }
         }
       }
@@ -334,10 +338,10 @@ class Dataset[T] private[sql](
 
       // Compute the width of field name and data columns
       val fieldNameColWidth = fieldNames.foldLeft(minimumColWidth) { case (curMax, fieldName) =>
-        math.max(curMax, fieldName.length + fullWidthRegex.findAllIn(fieldName).size)
+        math.max(curMax, stringHalfWidth(fieldName))
       }
       val dataColWidth = dataRows.foldLeft(minimumColWidth) { case (curMax, row) =>
-        math.max(curMax, row.map(cell => cell.length + fullWidthRegex.findAllIn(cell).size).max)
+        math.max(curMax, row.map(cell => stringHalfWidth(cell)).max)
       }
 
       dataRows.zipWithIndex.foreach { case (row, i) =>
@@ -346,8 +350,8 @@ class Dataset[T] private[sql](
           s"-RECORD $i", fieldNameColWidth + dataColWidth + 5, "-")
         sb.append(rowHeader).append("\n")
         row.zipWithIndex.map { case (cell, j) =>
-          val fieldName = StringUtils.rightPad(fieldNames(j), fieldNameColWidth - fullWidthRegex.findAllIn(fieldNames(j)).size)
-          val data = StringUtils.rightPad(cell, dataColWidth - fullWidthRegex.findAllIn(cell).size)
+          val fieldName = fieldNames(j) + " " * (fieldNameColWidth - stringHalfWidth(fieldNames(j)))
+          val data = cell + " " * (dataColWidth - stringHalfWidth(cell))
           s" $fieldName | $data "
         }.addString(sb, "", "\n", "\n")
       }

From 697ac047d85b5579c25fbd5f5e25d099562d20fc Mon Sep 17 00:00:00 2001
From: xuejianbest <384329882@qq.com>
Date: Wed, 29 Aug 2018 17:27:14 +0800
Subject: [PATCH 05/10] Move the stringHalfWidth method into util.Utils

Move the stringHalfWidth method into util.Utils and add tests for it.
---
 .../scala/org/apache/spark/util/Utils.scala   | 21 +++++++++++++++++++
 .../org/apache/spark/util/UtilsSuite.scala    | 19 +++++++++++++++++
 .../scala/org/apache/spark/sql/Dataset.scala  | 20 +++++++-----------
 3 files changed, 47 insertions(+), 13 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 7ec707d94ed8..8e0a7d91d7b3 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -2794,6 +2794,27 @@ private[spark] object Utils extends Logging {
       }
     }
   }
+
+  /**
+   * Regular expression matching full width characters
+   */
+  private lazy val fullWidthRegex = ("""[""" +
+    """\u1100-\u115F""" +
+    """\u2E80-\uA4CF""" +
+    """\uAC00-\uD7A3""" +
+    """\uF900-\uFAFF""" +
+    """\uFE10-\uFE19""" +
+    """\uFE30-\uFE6F""" +
+    """\uFF00-\uFF60""" +
+    """\uFFE0-\uFFE6""" +
+    """]""").r
+  /**
+   * Return the number of half width of a string
+   * A full width character occupies two half widths
+   */
+  def stringHalfWidth(str: String): Int = {
+    if(str == null) 0 else str.length + fullWidthRegex.findAllIn(str).size
+  }
 }
 
 private[util] object CallerContext extends Logging {
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index 418d2f9b8850..ac907c0dc1b1 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -1184,6 +1184,25 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
     assert(Utils.getSimpleName(classOf[MalformedClassObject.MalformedClass]) ===
       "UtilsSuite$MalformedClassObject$MalformedClass")
   }
+
+  test("stringHalfWidth") {
+    assert(Utils.stringHalfWidth(null) == 0)
+    assert(Utils.stringHalfWidth("") == 0)
+    assert(Utils.stringHalfWidth("ab c") == 4)
+    assert(Utils.stringHalfWidth("1098") == 4)
+    assert(Utils.stringHalfWidth("mø") == 2)
+    assert(Utils.stringHalfWidth("γύρ") == 3)
+    assert(Utils.stringHalfWidth("pê") == 2)
+    assert(Utils.stringHalfWidth("ー") == 2)
+    assert(Utils.stringHalfWidth("测") == 2)
+    assert(Utils.stringHalfWidth("か") == 2)
+    assert(Utils.stringHalfWidth("걸") == 2)
+    assert(Utils.stringHalfWidth("à") == 1)
+    assert(Utils.stringHalfWidth("焼") == 2)
+    assert(Utils.stringHalfWidth("羍む") == 4)
+    assert(Utils.stringHalfWidth("뺭ᾘ") == 3)
+    assert(Utils.stringHalfWidth("\u0967\u0968\u0969") == 3)
+  }
 }
 
 private class SimpleExtension
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index 7382e2af3929..a884a567812d 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -294,12 +294,6 @@ class Dataset[T] private[sql](
     // We set a minimum column width at '3'
     val minimumColWidth = 3
 
-    // Regular expression matching full width characters
-    val fullWidthRegex = """[\u1100-\u115F\u2E80-\uA4CF\uAC00-\uD7A3\uF900-\uFAFF\uFE10-\uFE19\uFE30-\uFE6F\uFF00-\uFF60\uFFE0-\uFFE6]""".r
-    // The number of half width of a string
-    def stringHalfWidth = (str: String) => {
-      str.length + fullWidthRegex.findAllIn(str).size
-    }
     if (!vertical) {
       // Initialise the width of each column to a minimum value
       val colWidths = Array.fill(numCols)(minimumColWidth)
@@ -307,16 +301,16 @@ class Dataset[T] private[sql](
       // Compute the width of each column
       for (row <- rows) {
         for ((cell, i) <- row.zipWithIndex) {
-          colWidths(i) = math.max(colWidths(i), stringHalfWidth(cell))
+          colWidths(i) = math.max(colWidths(i), Utils.stringHalfWidth(cell))
         }
       }
 
       val paddedRows = rows.map { row =>
         row.zipWithIndex.map { case (cell, i) =>
           if (truncate > 0) {
-            " " * (colWidths(i) - stringHalfWidth(cell)) + cell
+            " " * (colWidths(i) - Utils.stringHalfWidth(cell)) + cell
           } else {
-            cell + " " * (colWidths(i) - stringHalfWidth(cell))
+            cell + " " * (colWidths(i) - Utils.stringHalfWidth(cell))
           }
         }
       }
@@ -338,10 +332,10 @@ class Dataset[T] private[sql](
 
       // Compute the width of field name and data columns
       val fieldNameColWidth = fieldNames.foldLeft(minimumColWidth) { case (curMax, fieldName) =>
-        math.max(curMax, stringHalfWidth(fieldName))
+        math.max(curMax, Utils.stringHalfWidth(fieldName))
       }
       val dataColWidth = dataRows.foldLeft(minimumColWidth) { case (curMax, row) =>
-        math.max(curMax, row.map(cell => stringHalfWidth(cell)).max)
+        math.max(curMax, row.map(cell => Utils.stringHalfWidth(cell)).max)
       }
 
       dataRows.zipWithIndex.foreach { case (row, i) =>
@@ -350,8 +344,8 @@ class Dataset[T] private[sql](
           s"-RECORD $i", fieldNameColWidth + dataColWidth + 5, "-")
         sb.append(rowHeader).append("\n")
         row.zipWithIndex.map { case (cell, j) =>
-          val fieldName = fieldNames(j) + " " * (fieldNameColWidth - stringHalfWidth(fieldNames(j)))
-          val data = cell + " " * (dataColWidth - stringHalfWidth(cell))
+          val fieldName = fieldNames(j) + " " * (fieldNameColWidth - Utils.stringHalfWidth(fieldNames(j)))
+          val data = cell + " " * (dataColWidth - Utils.stringHalfWidth(cell))
           s" $fieldName | $data "
         }.addString(sb, "", "\n", "\n")
       }

From 3d65e6b9ffb35a6c6b38313768c35a7f08e4e2db Mon Sep 17 00:00:00 2001
From: xuejianbest <384329882@qq.com>
Date: Thu, 30 Aug 2018 10:14:07 +0800
Subject: [PATCH 06/10] Formatted code and non-functional changes

---
 core/src/main/scala/org/apache/spark/util/Utils.scala  |  9 +++++----
 .../src/main/scala/org/apache/spark/sql/Dataset.scala  | 10 ++++++----
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 8e0a7d91d7b3..89fd746b4243 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -2798,7 +2798,7 @@ private[spark] object Utils extends Logging {
   /**
    * Regular expression matching full width characters
    */
-  private lazy val fullWidthRegex = ("""[""" +
+  private val fullWidthRegex = ("""[""" +
     """\u1100-\u115F""" +
     """\u2E80-\uA4CF""" +
     """\uAC00-\uD7A3""" +
@@ -2808,12 +2808,13 @@ private[spark] object Utils extends Logging {
     """\uFF00-\uFF60""" +
     """\uFFE0-\uFFE6""" +
     """]""").r
+
   /**
-   * Return the number of half width of a string
-   * A full width character occupies two half widths
+   * Return the number of half widths in a given string. Note that a full width character
+   * occupies two half widths.
    */
   def stringHalfWidth(str: String): Int = {
-    if(str == null) 0 else str.length + fullWidthRegex.findAllIn(str).size
+    if (str == null) 0 else str.length + fullWidthRegex.findAllIn(str).size
   }
 }
 
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
index a884a567812d..01a11c306c0f 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala
@@ -308,9 +308,9 @@ class Dataset[T] private[sql](
       val paddedRows = rows.map { row =>
         row.zipWithIndex.map { case (cell, i) =>
           if (truncate > 0) {
-            " " * (colWidths(i) - Utils.stringHalfWidth(cell)) + cell
+            StringUtils.leftPad(cell, colWidths(i) - Utils.stringHalfWidth(cell) + cell.length)
           } else {
-            cell + " " * (colWidths(i) - Utils.stringHalfWidth(cell))
+            StringUtils.rightPad(cell, colWidths(i) - Utils.stringHalfWidth(cell) + cell.length)
           }
         }
       }
@@ -344,8 +344,10 @@ class Dataset[T] private[sql](
           s"-RECORD $i", fieldNameColWidth + dataColWidth + 5, "-")
         sb.append(rowHeader).append("\n")
         row.zipWithIndex.map { case (cell, j) =>
-          val fieldName = fieldNames(j) + " " * (fieldNameColWidth - Utils.stringHalfWidth(fieldNames(j)))
-          val data = cell + " " * (dataColWidth - Utils.stringHalfWidth(cell))
+          val fieldName = StringUtils.rightPad(fieldNames(j),
+            fieldNameColWidth - Utils.stringHalfWidth(fieldNames(j)) + fieldNames(j).length)
+          val data = StringUtils.rightPad(cell,
+            dataColWidth - Utils.stringHalfWidth(cell) + cell.length)
           s" $fieldName | $data "
         }.addString(sb, "", "\n", "\n")
       }

From 363de6ba0f769d31ba179af6a1600bb29f5ba8ef Mon Sep 17 00:00:00 2001
From: xuejianbest <384329882@qq.com>
Date: Thu, 30 Aug 2018 12:09:16 +0800
Subject: [PATCH 07/10] Add tests in DatasetSuite

---
 .../org/apache/spark/sql/DatasetSuite.scala   | 47 +++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index cf24eba12801..f58a41f25e53 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -969,6 +969,53 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
     checkShowString(ds, expected)
   }
 
+  test("SPARK-25108 Fix the show method to display the full width character alignment problem") {
+    val df = Seq(
+      (0, null, 1),
+      (0, "", 1),
+      (0, "ab c", 1),
+      (0, "1098", 1),
+      (0, "mø", 1),
+      (0, "γύρ", 1),
+      (0, "pê", 1),
+      (0, "ー", 1),
+      (0, "测", 1),
+      (0, "か", 1),
+      (0, "걸", 1),
+      (0, "à", 1),
+      (0, "焼", 1),
+      (0, "羍む", 1),
+      (0, "뺭ᾘ", 1),
+      (0, "\u0967\u0968\u0969", 1)
+    ).toDF("b", "a", "c")
+
+    val ds = df.as[ClassData]
+    val expected =
+      """+---+----+---+
+        ||  b|   a|  c|
+        |+---+----+---+
+        ||  0|null|  1|
+        ||  0|    |  1|
+        ||  0|ab c|  1|
+        ||  0|1098|  1|
+        ||  0|  mø|  1|
+        ||  0| γύρ|  1|
+        ||  0|  pê|  1|
+        ||  0|  ー|  1|
+        ||  0|  测|  1|
+        ||  0|  か|  1|
+        ||  0|  걸|  1|
+        ||  0|   à|  1|
+        ||  0|  焼|  1|
+        ||  0|羍む|  1|
+        ||  0| 뺭ᾘ|  1|
+        ||  0| १२३|  1|
+        |+---+----+---+
+        |""".stripMargin
+
+    checkShowString(df, expected)
+  }
+
   test(
     "SPARK-15112: EmbedDeserializerInFilter should not optimize plan fragment that changes schema"
   ) {

From 3649de50235cd19cfea2c3d88d1ccfb18ea8893a Mon Sep 17 00:00:00 2001
From: xuejianbest <384329882@qq.com>
Date: Thu, 30 Aug 2018 12:12:47 +0800
Subject: [PATCH 08/10] Modified variable name in DatasetSuite

---
 sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index f58a41f25e53..a18f48f0bffb 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -1013,7 +1013,7 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
         |+---+----+---+
         |""".stripMargin
 
-    checkShowString(df, expected)
+    checkShowString(ds, expected)
   }
 
   test(

From 45ac272ca667f3330f7b550b463b23c284d9eadf Mon Sep 17 00:00:00 2001
From: xuejianbest <384329882@qq.com>
Date: Fri, 31 Aug 2018 10:52:33 +0800
Subject: [PATCH 09/10] Adding scalastyle:off nonascii in code

To solve style errors because of include non-ascii characters in files.
Disable the check for sections of code where it's appropriate to use unicode chars.
---
 core/src/main/scala/org/apache/spark/util/Utils.scala       | 2 ++
 core/src/test/scala/org/apache/spark/util/UtilsSuite.scala  | 2 ++
 .../src/test/scala/org/apache/spark/sql/DatasetSuite.scala  | 6 ++++--
 3 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index 89fd746b4243..d2d26a0b61bc 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -2799,6 +2799,7 @@ private[spark] object Utils extends Logging {
    * Regular expression matching full width characters
    */
   private val fullWidthRegex = ("""[""" +
+    // scalastyle:off nonascii
     """\u1100-\u115F""" +
     """\u2E80-\uA4CF""" +
     """\uAC00-\uD7A3""" +
@@ -2807,6 +2808,7 @@ private[spark] object Utils extends Logging {
     """\uFE30-\uFE6F""" +
     """\uFF00-\uFF60""" +
     """\uFFE0-\uFFE6""" +
+    // scalastyle:on nonascii
     """]""").r
 
   /**
diff --git a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
index ac907c0dc1b1..943b53522d64 100644
--- a/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
+++ b/core/src/test/scala/org/apache/spark/util/UtilsSuite.scala
@@ -1186,6 +1186,7 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
   }
 
   test("stringHalfWidth") {
+    // scalastyle:off nonascii
     assert(Utils.stringHalfWidth(null) == 0)
     assert(Utils.stringHalfWidth("") == 0)
     assert(Utils.stringHalfWidth("ab c") == 4)
@@ -1202,6 +1203,7 @@ class UtilsSuite extends SparkFunSuite with ResetSystemProperties with Logging {
     assert(Utils.stringHalfWidth("羍む") == 4)
     assert(Utils.stringHalfWidth("뺭ᾘ") == 3)
     assert(Utils.stringHalfWidth("\u0967\u0968\u0969") == 3)
+    // scalastyle:on nonascii
   }
 }
 
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
index a18f48f0bffb..ca8fbc991a3a 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DatasetSuite.scala
@@ -970,6 +970,7 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
   }
 
   test("SPARK-25108 Fix the show method to display the full width character alignment problem") {
+    // scalastyle:off nonascii
     val df = Seq(
       (0, null, 1),
       (0, "", 1),
@@ -988,9 +989,10 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
       (0, "뺭ᾘ", 1),
       (0, "\u0967\u0968\u0969", 1)
     ).toDF("b", "a", "c")
-
+    // scalastyle:on nonascii
     val ds = df.as[ClassData]
     val expected =
+      // scalastyle:off nonascii
       """+---+----+---+
         ||  b|   a|  c|
         |+---+----+---+
@@ -1012,7 +1014,7 @@ class DatasetSuite extends QueryTest with SharedSQLContext {
         ||  0| १२३|  1|
         |+---+----+---+
         |""".stripMargin
-
+    // scalastyle:on nonascii
     checkShowString(ds, expected)
   }
 

From 52acfd58a82f20656331b43a2b27944fdfac2b3f Mon Sep 17 00:00:00 2001
From: xuejianbest <384329882@qq.com>
Date: Tue, 4 Sep 2018 20:18:47 +0800
Subject: [PATCH 10/10] Fix the show method to display the full width character
 alignment problem

Some characters (called full-width characters) occupy two
ordinary characters (called half-width characters) width
when displayed in a terminal such as Xshell.
Therefore, when the Dataset.show() method is called,
if there are full-width characters and half-width characters
at the same time, alignment problems will occur.

This commit adds a method to calculate the number of half
widths in a given string, so that it can get the correct fill
when calling show method, which gives the display a more
perfect alignment.

Performance impact:
Tested a Dataset consisting of 100 rows, each row has two columns,
one column is the index (0-99), and the other column is a random
string of length 100 characters, and then the showString display
is called separately.
The original show method (w/o this patch) took about 42ms, and the
improved time took about 46ms, the performance was about 10% worse.
---
 core/src/main/scala/org/apache/spark/util/Utils.scala | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/core/src/main/scala/org/apache/spark/util/Utils.scala b/core/src/main/scala/org/apache/spark/util/Utils.scala
index d2d26a0b61bc..b5268f83309f 100644
--- a/core/src/main/scala/org/apache/spark/util/Utils.scala
+++ b/core/src/main/scala/org/apache/spark/util/Utils.scala
@@ -2796,7 +2796,10 @@ private[spark] object Utils extends Logging {
   }
 
   /**
-   * Regular expression matching full width characters
+   * Regular expression matching full width characters.
+   *
+   * Looked at all the 0x0000-0xFFFF characters (unicode) and showed them under Xshell.
+   * Found all the full width characters, then get the regular expression.
    */
   private val fullWidthRegex = ("""[""" +
     // scalastyle:off nonascii
@@ -2814,6 +2817,9 @@ private[spark] object Utils extends Logging {
   /**
    * Return the number of half widths in a given string. Note that a full width character
    * occupies two half widths.
+   *
+   * For a string consisting of 1 million characters, the execution of this method requires
+   * about 50ms.
    */
   def stringHalfWidth(str: String): Int = {
     if (str == null) 0 else str.length + fullWidthRegex.findAllIn(str).size