From 9d437380d7304d7c20bcccd0ed95ad14341c0a7a Mon Sep 17 00:00:00 2001 From: Adrian Olosutean Date: Tue, 11 Jan 2022 11:18:15 +0100 Subject: [PATCH 1/6] #19 ColumnImplicits and StructFieldImplicits --- README.md | 53 +++++++++++++++ .../commons/implicits/ColumnImplicits.scala | 65 +++++++++++++++++++ .../implicits/StructFieldImplicits.scala | 48 ++++++++++++++ 3 files changed, 166 insertions(+) create mode 100644 src/main/scala/za/co/absa/spark/commons/implicits/ColumnImplicits.scala create mode 100644 src/main/scala/za/co/absa/spark/commons/implicits/StructFieldImplicits.scala diff --git a/README.md b/README.md index a2267190..ed2e88df 100644 --- a/README.md +++ b/README.md @@ -50,4 +50,57 @@ select to order and positionally filter columns of a DataFrame ```scala SchemaUtils.alignSchema(dataFrameToBeAligned, modelSchema) + ``` + +### ColumnImplicits + +_Column_ provides implicit methods for transforming Spark Columns + +1. Transforms the column into a booleaan column, checking if values are negative or positive infinity + + ```scala + column.isInfinite() + ``` +2. Returns column with requested substring. It shifts the substring indexation to be in accordance with Scala/ Java. + The provided starting position where to start the substring from, if negative it will be counted from end + + ```scala + column.zeroBasedSubstr(startPos) + ``` +3. Returns column with requested substring. It shifts the substring indexation to be in accordance with Scala/ Java. + If the provided starting position where to start the substring from is negative, it will be counted from end. + The length of the desired substring, if longer then the rest of the string, all the remaining characters are taken. + + + ```scala + column.zeroBasedSubstr(startPos, length) + ``` + +### StructFieldImplicits + +_StructFieldImplicits_ provides implicit methods for working with StructField objects. + +1. Gets the metadata String value given a key + + ```scala + structField.getMetadataString(key) + ``` + +2. Gets the metadata Char value given a key if the value is a single character String, it returns the char, + otherwise None + + ```scala + structField.getMetadataChar(key) + ``` + +3. Gets the metadata boolean value of a given key, given that it can be transformed into boolean + + ```scala + structField.getMetadataStringAsBoolean(key) + ``` + +4. Checks the structfield if it has the provided key, returns a boolean + + ```scala + structField.hasMetadataKey(key) ``` \ No newline at end of file diff --git a/src/main/scala/za/co/absa/spark/commons/implicits/ColumnImplicits.scala b/src/main/scala/za/co/absa/spark/commons/implicits/ColumnImplicits.scala new file mode 100644 index 00000000..00fce653 --- /dev/null +++ b/src/main/scala/za/co/absa/spark/commons/implicits/ColumnImplicits.scala @@ -0,0 +1,65 @@ +/* + * Copyright 2021 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package za.co.absa.spark.commons.implicits + +import org.apache.spark.sql.Column +import org.apache.spark.sql.functions._ + +object ColumnImplicits { + implicit class ColumnEnhancements(column: Column) { + def isInfinite: Column = { + column.isin(Double.PositiveInfinity, Double.NegativeInfinity) + } + + /** + * Spark strings are based on 1 unlike scala. The function shifts the substring indexation to be in accordance with + * Scala/ Java. + * Another enhancement is, that the function allows a negative index, denoting counting of the index from back + * This version takes the substring from the startPos until the end. + * + * @param startPos the index (zero based) where to start the substring from, if negative it's counted from end + * @return column with requested substring + */ + def zeroBasedSubstr(startPos: Int): Column = { + if (startPos >= 0) { + zeroBasedSubstr(startPos, Int.MaxValue - startPos) + } else { + zeroBasedSubstr(startPos, -startPos) + } + } + + /** + * Spark strings are base on 1 unlike scala. The function shifts the substring indexation to be in accordance with + * Scala/ Java. + * Another enhancement is, that the function allows a negative index, denoting counting of the index from back + * This version takes the substring from the startPos and takes up to the given number of characters (less. + * + * @param startPos the index (zero based) where to start the substring from, if negative it's counted from end + * @param len length of the desired substring, if longer then the rest of the string, all the remaining characters are taken + * @return column with requested substring + */ + def zeroBasedSubstr(startPos: Int, len: Int): Column = { + if (startPos >= 0) { + column.substr(startPos + 1, len) + } else { + val startPosColumn = greatest(length(column) + startPos + 1, lit(1)) + val lenColumn = lit(len) + when(length(column) + startPos <= 0, length(column) + startPos).otherwise(0) + column.substr(startPosColumn, lenColumn) + } + } + } +} diff --git a/src/main/scala/za/co/absa/spark/commons/implicits/StructFieldImplicits.scala b/src/main/scala/za/co/absa/spark/commons/implicits/StructFieldImplicits.scala new file mode 100644 index 00000000..1610bde6 --- /dev/null +++ b/src/main/scala/za/co/absa/spark/commons/implicits/StructFieldImplicits.scala @@ -0,0 +1,48 @@ +/* + * Copyright 2021 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package za.co.absa.spark.commons.implicits + +import org.apache.spark.sql.types._ +import scala.util.Try + +object StructFieldImplicits { + implicit class StructFieldEnhancements(val structField: StructField) { + def getMetadataString(key: String): Option[String] = { + Try(structField.metadata.getString(key)).toOption + } + + def getMetadataChar(key: String): Option[Char] = { + val resultString = Try(structField.metadata.getString(key)).toOption + resultString.flatMap { s => + if (s.length == 1) { + Option(s(0)) + } else { + None + } + } + } + + def getMetadataStringAsBoolean(key: String): Option[Boolean] = { + Try(structField.metadata.getString(key).toBoolean).toOption + } + + + def hasMetadataKey(key: String): Boolean = { + structField.metadata.contains(key) + } + } +} From 330c1fa576ba2172586da94c115a1e5c9cb76c40 Mon Sep 17 00:00:00 2001 From: Adrian Olosutean Date: Thu, 13 Jan 2022 15:21:27 +0100 Subject: [PATCH 2/6] #19 tests and small code fix --- .../implicits/StructFieldImplicits.scala | 3 +- .../implicits/ColumnImplicitsTest.scala | 28 ++++++++++++ .../implicits/StructFieldImplicitsTest.scala | 45 +++++++++++++++++++ 3 files changed, 74 insertions(+), 2 deletions(-) create mode 100644 src/test/scala/za/co/absa/spark/commons/implicits/ColumnImplicitsTest.scala create mode 100644 src/test/scala/za/co/absa/spark/commons/implicits/StructFieldImplicitsTest.scala diff --git a/src/main/scala/za/co/absa/spark/commons/implicits/StructFieldImplicits.scala b/src/main/scala/za/co/absa/spark/commons/implicits/StructFieldImplicits.scala index 1610bde6..62063941 100644 --- a/src/main/scala/za/co/absa/spark/commons/implicits/StructFieldImplicits.scala +++ b/src/main/scala/za/co/absa/spark/commons/implicits/StructFieldImplicits.scala @@ -28,7 +28,7 @@ object StructFieldImplicits { def getMetadataChar(key: String): Option[Char] = { val resultString = Try(structField.metadata.getString(key)).toOption resultString.flatMap { s => - if (s.length == 1) { + if (s != null && s.length == 1) { Option(s(0)) } else { None @@ -40,7 +40,6 @@ object StructFieldImplicits { Try(structField.metadata.getString(key).toBoolean).toOption } - def hasMetadataKey(key: String): Boolean = { structField.metadata.contains(key) } diff --git a/src/test/scala/za/co/absa/spark/commons/implicits/ColumnImplicitsTest.scala b/src/test/scala/za/co/absa/spark/commons/implicits/ColumnImplicitsTest.scala new file mode 100644 index 00000000..35b0f537 --- /dev/null +++ b/src/test/scala/za/co/absa/spark/commons/implicits/ColumnImplicitsTest.scala @@ -0,0 +1,28 @@ +package za.co.absa.spark.commons.implicits + +import org.apache.spark.sql.Column +import org.apache.spark.sql.functions.lit +import org.scalatest.funsuite.AnyFunSuite +import za.co.absa.spark.commons.implicits.ColumnImplicits.ColumnEnhancements + +class ColumnImplicitsTest extends AnyFunSuite{ + + private val column: Column = lit("abcdefgh") + + test("zeroBasedSubstr with tartPos") { + assertResult("cdefgh")(column.zeroBasedSubstr(2).expr.eval().toString) + assertResult("gh")(column.zeroBasedSubstr(-2).expr.eval().toString) + assertResult("")(column.zeroBasedSubstr(Int.MaxValue).expr.eval().toString) + assertResult("abcdefgh")(column.zeroBasedSubstr(Int.MinValue).expr.eval().toString) + } + + test("zeroBasedSubstr with tartPos and len") { + assertResult("cde")(column.zeroBasedSubstr(2, 3).expr.eval().toString) + assertResult("gh")(column.zeroBasedSubstr(-2, 7).expr.eval().toString) + assertResult("")(column.zeroBasedSubstr(Int.MaxValue, 1).expr.eval().toString) + assertResult("")(column.zeroBasedSubstr(Int.MaxValue, -3).expr.eval().toString) + assertResult("")(column.zeroBasedSubstr(Int.MinValue,2).expr.eval().toString) + assertResult("")(column.zeroBasedSubstr(Int.MinValue,-3).expr.eval().toString) + } + +} diff --git a/src/test/scala/za/co/absa/spark/commons/implicits/StructFieldImplicitsTest.scala b/src/test/scala/za/co/absa/spark/commons/implicits/StructFieldImplicitsTest.scala new file mode 100644 index 00000000..03e05b37 --- /dev/null +++ b/src/test/scala/za/co/absa/spark/commons/implicits/StructFieldImplicitsTest.scala @@ -0,0 +1,45 @@ +package za.co.absa.spark.commons.implicits + +import org.apache.spark.sql.types.{Metadata, StringType, StructField} +import org.scalatest.funsuite.AnyFunSuite +import za.co.absa.spark.commons.implicits.StructFieldImplicits.StructFieldEnhancements + +class StructFieldImplicitsTest extends AnyFunSuite { + + def fieldWith(value123: String) = { + val value1 = s"""{ \"a\" : ${value123} }""" + StructField("uu", StringType, true, Metadata.fromJson(value1)) + } + + test("getMetadataString") { + assertResult(Some(""))(fieldWith("\"\"").getMetadataString("a")) + assertResult(None)(fieldWith("123").getMetadataString("a")) + assertResult(Some("ffbfg"))(fieldWith("\"ffbfg\"").getMetadataString("a")) + assertResult(Some(null))(fieldWith("null").getMetadataString("a")) + } + + test("getMetadataChar") { + assertResult(None)(fieldWith("\"\"").getMetadataChar("a")) + assertResult(None)(fieldWith("123").getMetadataChar("a")) + assertResult(Some('g'))(fieldWith("\"g\"").getMetadataChar("a")) + assertResult(None)(fieldWith("null").getMetadataChar("a")) + } + + test("getMetadataStringAsBoolean") { + assertResult(None)(fieldWith("\"\"").getMetadataStringAsBoolean("a")) + assertResult(None)(fieldWith("123").getMetadataStringAsBoolean("a")) + assertResult(Some(true))(fieldWith("\"true\"").getMetadataStringAsBoolean("a")) + assertResult(Some(false))(fieldWith("\"false\"").getMetadataStringAsBoolean("a")) + assertResult(None)(fieldWith("false").getMetadataStringAsBoolean("a")) + assertResult(None)(fieldWith("true").getMetadataStringAsBoolean("a")) + assertResult(None)(fieldWith("null").getMetadataStringAsBoolean("a")) + } + + test("hastMetadataKKey") { + assertResult(true)(fieldWith("\"\"").hasMetadataKey("a")) + assertResult(false)(fieldWith("123").hasMetadataKey("b")) + assertResult(true)(fieldWith("\"hvh\"").hasMetadataKey("a")) + assertResult(true)(fieldWith("null").hasMetadataKey("a")) + } + +} From 32021cfd8ccc5dc88511773947b2cca536c83af3 Mon Sep 17 00:00:00 2001 From: Adrian Olosutean Date: Thu, 13 Jan 2022 15:31:15 +0100 Subject: [PATCH 3/6] #22 headers --- .../commons/implicits/ColumnImplicitsTest.scala | 16 ++++++++++++++++ .../implicits/StructFieldImplicitsTest.scala | 16 ++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/src/test/scala/za/co/absa/spark/commons/implicits/ColumnImplicitsTest.scala b/src/test/scala/za/co/absa/spark/commons/implicits/ColumnImplicitsTest.scala index 35b0f537..6e9a85b9 100644 --- a/src/test/scala/za/co/absa/spark/commons/implicits/ColumnImplicitsTest.scala +++ b/src/test/scala/za/co/absa/spark/commons/implicits/ColumnImplicitsTest.scala @@ -1,3 +1,19 @@ +/* + * Copyright 2021 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package za.co.absa.spark.commons.implicits import org.apache.spark.sql.Column diff --git a/src/test/scala/za/co/absa/spark/commons/implicits/StructFieldImplicitsTest.scala b/src/test/scala/za/co/absa/spark/commons/implicits/StructFieldImplicitsTest.scala index 03e05b37..53ba1224 100644 --- a/src/test/scala/za/co/absa/spark/commons/implicits/StructFieldImplicitsTest.scala +++ b/src/test/scala/za/co/absa/spark/commons/implicits/StructFieldImplicitsTest.scala @@ -1,3 +1,19 @@ +/* + * Copyright 2021 ABSA Group Limited + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package za.co.absa.spark.commons.implicits import org.apache.spark.sql.types.{Metadata, StringType, StructField} From 1a5b0043dd77ff6aaa3fd241ab45238617957b20 Mon Sep 17 00:00:00 2001 From: Adrian Olosutean Date: Fri, 14 Jan 2022 10:51:05 +0100 Subject: [PATCH 4/6] #19 feedback --- .../absa/spark/commons/implicits/ColumnImplicitsTest.scala | 5 +++-- .../spark/commons/implicits/StructFieldImplicitsTest.scala | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/test/scala/za/co/absa/spark/commons/implicits/ColumnImplicitsTest.scala b/src/test/scala/za/co/absa/spark/commons/implicits/ColumnImplicitsTest.scala index 6e9a85b9..20d1e62b 100644 --- a/src/test/scala/za/co/absa/spark/commons/implicits/ColumnImplicitsTest.scala +++ b/src/test/scala/za/co/absa/spark/commons/implicits/ColumnImplicitsTest.scala @@ -25,18 +25,19 @@ class ColumnImplicitsTest extends AnyFunSuite{ private val column: Column = lit("abcdefgh") - test("zeroBasedSubstr with tartPos") { + test("zeroBasedSubstr with startPos") { assertResult("cdefgh")(column.zeroBasedSubstr(2).expr.eval().toString) assertResult("gh")(column.zeroBasedSubstr(-2).expr.eval().toString) assertResult("")(column.zeroBasedSubstr(Int.MaxValue).expr.eval().toString) assertResult("abcdefgh")(column.zeroBasedSubstr(Int.MinValue).expr.eval().toString) } - test("zeroBasedSubstr with tartPos and len") { + test("zeroBasedSubstr with startPos and len") { assertResult("cde")(column.zeroBasedSubstr(2, 3).expr.eval().toString) assertResult("gh")(column.zeroBasedSubstr(-2, 7).expr.eval().toString) assertResult("")(column.zeroBasedSubstr(Int.MaxValue, 1).expr.eval().toString) assertResult("")(column.zeroBasedSubstr(Int.MaxValue, -3).expr.eval().toString) + assertResult("")(column.zeroBasedSubstr(4, -3).expr.eval().toString) assertResult("")(column.zeroBasedSubstr(Int.MinValue,2).expr.eval().toString) assertResult("")(column.zeroBasedSubstr(Int.MinValue,-3).expr.eval().toString) } diff --git a/src/test/scala/za/co/absa/spark/commons/implicits/StructFieldImplicitsTest.scala b/src/test/scala/za/co/absa/spark/commons/implicits/StructFieldImplicitsTest.scala index 53ba1224..5a325bca 100644 --- a/src/test/scala/za/co/absa/spark/commons/implicits/StructFieldImplicitsTest.scala +++ b/src/test/scala/za/co/absa/spark/commons/implicits/StructFieldImplicitsTest.scala @@ -38,6 +38,7 @@ class StructFieldImplicitsTest extends AnyFunSuite { assertResult(None)(fieldWith("\"\"").getMetadataChar("a")) assertResult(None)(fieldWith("123").getMetadataChar("a")) assertResult(Some('g'))(fieldWith("\"g\"").getMetadataChar("a")) + assertResult(None)(fieldWith("\"abc\"").getMetadataChar("a")) assertResult(None)(fieldWith("null").getMetadataChar("a")) } @@ -51,7 +52,7 @@ class StructFieldImplicitsTest extends AnyFunSuite { assertResult(None)(fieldWith("null").getMetadataStringAsBoolean("a")) } - test("hastMetadataKKey") { + test("hastMetadataKey") { assertResult(true)(fieldWith("\"\"").hasMetadataKey("a")) assertResult(false)(fieldWith("123").hasMetadataKey("b")) assertResult(true)(fieldWith("\"hvh\"").hasMetadataKey("a")) From 2110f30b5f5392f234eb57923b40c1e7e167be94 Mon Sep 17 00:00:00 2001 From: Adrian Olosutean Date: Wed, 19 Jan 2022 14:52:37 +0100 Subject: [PATCH 5/6] #19 renames --- README.md | 9 ++-- .../implicits/StructFieldImplicits.scala | 18 ++++---- .../implicits/StructFieldImplicitsTest.scala | 46 +++++++++---------- 3 files changed, 37 insertions(+), 36 deletions(-) diff --git a/README.md b/README.md index 772f4ad8..f68db30b 100644 --- a/README.md +++ b/README.md @@ -79,30 +79,31 @@ _Column_ provides implicit methods for transforming Spark Columns ### StructFieldImplicits _StructFieldImplicits_ provides implicit methods for working with StructField objects. +Of them, metadata methods are: 1. Gets the metadata String value given a key ```scala - structField.getMetadataString(key) + structField.metadata.getOptString(key) ``` 2. Gets the metadata Char value given a key if the value is a single character String, it returns the char, otherwise None ```scala - structField.getMetadataChar(key) + structField.metadata.getOptChar(key) ``` 3. Gets the metadata boolean value of a given key, given that it can be transformed into boolean ```scala - structField.getMetadataStringAsBoolean(key) + structField.metadata.getStringAsBoolean(key) ``` 4. Checks the structfield if it has the provided key, returns a boolean ```scala - structField.hasMetadataKey(key) + structField.metadata.hasKey(key) ``` # Spark Version Guard diff --git a/src/main/scala/za/co/absa/spark/commons/implicits/StructFieldImplicits.scala b/src/main/scala/za/co/absa/spark/commons/implicits/StructFieldImplicits.scala index 62063941..fdcb601e 100644 --- a/src/main/scala/za/co/absa/spark/commons/implicits/StructFieldImplicits.scala +++ b/src/main/scala/za/co/absa/spark/commons/implicits/StructFieldImplicits.scala @@ -20,13 +20,13 @@ import org.apache.spark.sql.types._ import scala.util.Try object StructFieldImplicits { - implicit class StructFieldEnhancements(val structField: StructField) { - def getMetadataString(key: String): Option[String] = { - Try(structField.metadata.getString(key)).toOption + implicit class StructFieldMetadataEnhancement(val metadata: Metadata) { + def getOptString(key: String): Option[String] = { + Try(metadata.getString(key)).toOption } - def getMetadataChar(key: String): Option[Char] = { - val resultString = Try(structField.metadata.getString(key)).toOption + def getOptChar(key: String): Option[Char] = { + val resultString = Try(metadata.getString(key)).toOption resultString.flatMap { s => if (s != null && s.length == 1) { Option(s(0)) @@ -36,12 +36,12 @@ object StructFieldImplicits { } } - def getMetadataStringAsBoolean(key: String): Option[Boolean] = { - Try(structField.metadata.getString(key).toBoolean).toOption + def getOptStringAsBoolean(key: String): Option[Boolean] = { + Try(metadata.getString(key).toBoolean).toOption } - def hasMetadataKey(key: String): Boolean = { - structField.metadata.contains(key) + def hasKey(key: String): Boolean = { + metadata.contains(key) } } } diff --git a/src/test/scala/za/co/absa/spark/commons/implicits/StructFieldImplicitsTest.scala b/src/test/scala/za/co/absa/spark/commons/implicits/StructFieldImplicitsTest.scala index 5a325bca..2fa0b9ed 100644 --- a/src/test/scala/za/co/absa/spark/commons/implicits/StructFieldImplicitsTest.scala +++ b/src/test/scala/za/co/absa/spark/commons/implicits/StructFieldImplicitsTest.scala @@ -18,7 +18,7 @@ package za.co.absa.spark.commons.implicits import org.apache.spark.sql.types.{Metadata, StringType, StructField} import org.scalatest.funsuite.AnyFunSuite -import za.co.absa.spark.commons.implicits.StructFieldImplicits.StructFieldEnhancements +import za.co.absa.spark.commons.implicits.StructFieldImplicits.StructFieldMetadataEnhancement class StructFieldImplicitsTest extends AnyFunSuite { @@ -28,35 +28,35 @@ class StructFieldImplicitsTest extends AnyFunSuite { } test("getMetadataString") { - assertResult(Some(""))(fieldWith("\"\"").getMetadataString("a")) - assertResult(None)(fieldWith("123").getMetadataString("a")) - assertResult(Some("ffbfg"))(fieldWith("\"ffbfg\"").getMetadataString("a")) - assertResult(Some(null))(fieldWith("null").getMetadataString("a")) + assertResult(Some(""))(fieldWith("\"\"").metadata.getOptString("a")) + assertResult(None)(fieldWith("123").metadata.getOptString("a")) + assertResult(Some("ffbfg"))(fieldWith("\"ffbfg\"").metadata.getOptString("a")) + assertResult(Some(null))(fieldWith("null").metadata.getOptString("a")) } - test("getMetadataChar") { - assertResult(None)(fieldWith("\"\"").getMetadataChar("a")) - assertResult(None)(fieldWith("123").getMetadataChar("a")) - assertResult(Some('g'))(fieldWith("\"g\"").getMetadataChar("a")) - assertResult(None)(fieldWith("\"abc\"").getMetadataChar("a")) - assertResult(None)(fieldWith("null").getMetadataChar("a")) + test("getOptChar") { + assertResult(None)(fieldWith("\"\"").metadata.getOptChar("a")) + assertResult(None)(fieldWith("123").metadata.getOptChar("a")) + assertResult(Some('g'))(fieldWith("\"g\"").metadata.getOptChar("a")) + assertResult(None)(fieldWith("\"abc\"").metadata.getOptChar("a")) + assertResult(None)(fieldWith("null").metadata.getOptChar("a")) } - test("getMetadataStringAsBoolean") { - assertResult(None)(fieldWith("\"\"").getMetadataStringAsBoolean("a")) - assertResult(None)(fieldWith("123").getMetadataStringAsBoolean("a")) - assertResult(Some(true))(fieldWith("\"true\"").getMetadataStringAsBoolean("a")) - assertResult(Some(false))(fieldWith("\"false\"").getMetadataStringAsBoolean("a")) - assertResult(None)(fieldWith("false").getMetadataStringAsBoolean("a")) - assertResult(None)(fieldWith("true").getMetadataStringAsBoolean("a")) - assertResult(None)(fieldWith("null").getMetadataStringAsBoolean("a")) + test("getStringAsBoolean") { + assertResult(None)(fieldWith("\"\"").metadata.getOptStringAsBoolean("a")) + assertResult(None)(fieldWith("123").metadata.getOptStringAsBoolean("a")) + assertResult(Some(true))(fieldWith("\"true\"").metadata.getOptStringAsBoolean("a")) + assertResult(Some(false))(fieldWith("\"false\"").metadata.getOptStringAsBoolean("a")) + assertResult(None)(fieldWith("false").metadata.getOptStringAsBoolean("a")) + assertResult(None)(fieldWith("true").metadata.getOptStringAsBoolean("a")) + assertResult(None)(fieldWith("null").metadata.getOptStringAsBoolean("a")) } test("hastMetadataKey") { - assertResult(true)(fieldWith("\"\"").hasMetadataKey("a")) - assertResult(false)(fieldWith("123").hasMetadataKey("b")) - assertResult(true)(fieldWith("\"hvh\"").hasMetadataKey("a")) - assertResult(true)(fieldWith("null").hasMetadataKey("a")) + assertResult(true)(fieldWith("\"\"").metadata.hasKey("a")) + assertResult(false)(fieldWith("123").metadata.hasKey("b")) + assertResult(true)(fieldWith("\"hvh\"").metadata.hasKey("a")) + assertResult(true)(fieldWith("null").metadata.hasKey("a")) } } From e58d3aaf26f573806c945a7c556cc44934f94cfa Mon Sep 17 00:00:00 2001 From: Adrian Olosutean Date: Thu, 20 Jan 2022 14:22:25 +0100 Subject: [PATCH 6/6] #19 other feedback --- README.md | 4 ++-- .../commons/implicits/StructFieldImplicitsTest.scala | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index f68db30b..f5fbe5c2 100644 --- a/README.md +++ b/README.md @@ -54,7 +54,7 @@ select to order and positionally filter columns of a DataFrame ### ColumnImplicits -_Column_ provides implicit methods for transforming Spark Columns +_ColumnImplicits_ provide implicit methods for transforming Spark Columns 1. Transforms the column into a booleaan column, checking if values are negative or positive infinity @@ -81,7 +81,7 @@ _Column_ provides implicit methods for transforming Spark Columns _StructFieldImplicits_ provides implicit methods for working with StructField objects. Of them, metadata methods are: -1. Gets the metadata String value given a key +1. Gets the metadata Option[String] value given a key ```scala structField.metadata.getOptString(key) diff --git a/src/test/scala/za/co/absa/spark/commons/implicits/StructFieldImplicitsTest.scala b/src/test/scala/za/co/absa/spark/commons/implicits/StructFieldImplicitsTest.scala index 2fa0b9ed..055afa9c 100644 --- a/src/test/scala/za/co/absa/spark/commons/implicits/StructFieldImplicitsTest.scala +++ b/src/test/scala/za/co/absa/spark/commons/implicits/StructFieldImplicitsTest.scala @@ -23,11 +23,11 @@ import za.co.absa.spark.commons.implicits.StructFieldImplicits.StructFieldMetada class StructFieldImplicitsTest extends AnyFunSuite { def fieldWith(value123: String) = { - val value1 = s"""{ \"a\" : ${value123} }""" + val value1 = s"""{ "a" : ${value123} }""" StructField("uu", StringType, true, Metadata.fromJson(value1)) } - test("getMetadataString") { + test("getOptString") { assertResult(Some(""))(fieldWith("\"\"").metadata.getOptString("a")) assertResult(None)(fieldWith("123").metadata.getOptString("a")) assertResult(Some("ffbfg"))(fieldWith("\"ffbfg\"").metadata.getOptString("a")) @@ -42,7 +42,7 @@ class StructFieldImplicitsTest extends AnyFunSuite { assertResult(None)(fieldWith("null").metadata.getOptChar("a")) } - test("getStringAsBoolean") { + test("getOptStringAsBoolean") { assertResult(None)(fieldWith("\"\"").metadata.getOptStringAsBoolean("a")) assertResult(None)(fieldWith("123").metadata.getOptStringAsBoolean("a")) assertResult(Some(true))(fieldWith("\"true\"").metadata.getOptStringAsBoolean("a")) @@ -52,7 +52,7 @@ class StructFieldImplicitsTest extends AnyFunSuite { assertResult(None)(fieldWith("null").metadata.getOptStringAsBoolean("a")) } - test("hastMetadataKey") { + test("hasKey") { assertResult(true)(fieldWith("\"\"").metadata.hasKey("a")) assertResult(false)(fieldWith("123").metadata.hasKey("b")) assertResult(true)(fieldWith("\"hvh\"").metadata.hasKey("a"))