-
Notifications
You must be signed in to change notification settings - Fork 0
22 enceladus schema utils #23
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
9d43738
b88af07
330c1fa
475949e
56bfd97
32021cf
25f1983
1a5b004
2110f30
f62b5c1
5f2746a
e58d3aa
ade77e2
2517a20
d9533d2
4248a6f
8bde069
c359974
965908d
3230c90
09f3d20
e77fe1d
11807ae
2e7c01f
fdcaabc
825ada4
8a141d8
1fa973c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,88 @@ | ||
| /* | ||
| * Copyright 2021 ABSA Group Limited | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| package za.co.absa.spark.commons.implicits | ||
|
|
||
| import org.apache.spark.sql.types.{ArrayType, DataType, StructType} | ||
| import za.co.absa.spark.commons.implicits.StructTypeImplicits.StructTypeEnhancements | ||
|
|
||
| import scala.annotation.tailrec | ||
|
|
||
| object ArrayTypeImplicits { | ||
|
|
||
| implicit class ArrayTypeEnhancements(arrayType: ArrayType) { | ||
|
|
||
| /** | ||
| * Compares 2 array fields of a dataframe utils. | ||
| * | ||
| * @param other The second array to compare | ||
| * @return true if provided arrays are the same ignoring nullability | ||
| */ | ||
| @scala.annotation.tailrec | ||
| final def isEquivalentArrayType(other: ArrayType): Boolean = { | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This seem very similar to There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's fix only obvious errors now. We can improve and add items in next release - a minor. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fine by me, makes sense. |
||
| arrayType.elementType match { | ||
| case arrayType1: ArrayType => | ||
| other.elementType match { | ||
| case arrayType2: ArrayType => arrayType1.isEquivalentArrayType(arrayType2) | ||
| case _ => false | ||
| } | ||
| case structType1: StructType => | ||
| other.elementType match { | ||
| case structType2: StructType => structType1.isEquivalent(structType2) | ||
| case _ => false | ||
| } | ||
| case _ => arrayType.elementType == other.elementType | ||
| } | ||
| } | ||
|
|
||
|
|
||
| /** | ||
| * Finds all differences of two ArrayTypes and returns their paths | ||
| * | ||
| * @param array2 The second array to compare | ||
| * @param parent Parent path. This is used for the accumulation of differences and their print out | ||
| * @return Returns a Seq of found difference paths in scheme in the Array | ||
| */ | ||
| @scala.annotation.tailrec | ||
| private[implicits] final def diffArray(array2: ArrayType, parent: String): Seq[String] = { | ||
| arrayType.elementType match { | ||
| case _ if arrayType.elementType.typeName != array2.elementType.typeName => | ||
| Seq(s"$parent data type doesn't match (${arrayType.elementType.typeName}) vs (${array2.elementType.typeName})") | ||
| case arrayType1: ArrayType => | ||
| arrayType1.diffArray(array2.elementType.asInstanceOf[ArrayType], s"$parent") | ||
| case structType1: StructType => | ||
| structType1.diffSchema(array2.elementType.asInstanceOf[StructType], s"$parent") | ||
| case _ => Seq.empty[String] | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * For an array of arrays of arrays, ... get the final element type at the bottom of the array | ||
| * | ||
| * @return A non-array data type at the bottom of array nesting | ||
| */ | ||
| final def getDeepestArrayType(): Unit = { | ||
| @tailrec | ||
| def getDeepestArrayTypeHelper(arrayType: ArrayType): DataType = { | ||
| arrayType.elementType match { | ||
| case a: ArrayType => getDeepestArrayTypeHelper(a) | ||
| case b => b | ||
| } | ||
| } | ||
| getDeepestArrayTypeHelper(arrayType) | ||
| } | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is probably the source where it made sense.