From cc764d47d25eb35ae6c38c1caf1a87d65bdd10fc Mon Sep 17 00:00:00 2001 From: Travis Hegner Date: Thu, 5 Nov 2015 09:53:43 -0500 Subject: [PATCH 1/4] initial attempt --- .../apache/spark/sql/jdbc/JdbcDialects.scala | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala index 88ae83957a708..42b6475037dbc 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala @@ -315,3 +315,25 @@ case object DerbyDialect extends JdbcDialect { } +/** + * :: DeveloperApi :: + * Default Oracle dialect, mapping a nonspecific + * numeric type to a general decimal type. + * Solution by @bdolbeare (github.com) + */ +@DeveloperApi +case object OracleDialect extends JdbcDialect { + override def getCatalystType( + sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = { + // Handle NUMBER fields that have no precision/scale in special way + // because JDBC ResultSetMetaData converts this to 0 procision and -127 scale + if (sqlType == Types.NUMERIC && size == 0) { + // This is sub-optimal as we have to pick a precision/scale in advance whereas the data in Oracle is allowed + // to have different precision/scale for each value. This conversion works in our domain for now though we + // need a more durable solution. Look into changing JDBCRDD (line 406): + // FROM: mutableRow.update(i, Decimal(decimalVal, p, s)) + // TO: mutableRow.update(i, Decimal(decimalVal)) + Some(DecimalType(DecimalType.MAX_PRECISION, 10)) + } else None + } +} From 3157ed5a71047ed85e8d80d4ef92c34273c38f1e Mon Sep 17 00:00:00 2001 From: Travis Hegner Date: Thu, 5 Nov 2015 10:04:09 -0500 Subject: [PATCH 2/4] adding canHandle override, and registration of OracleDialect --- .../src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala index 42b6475037dbc..f28cd0ba55c33 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala @@ -139,6 +139,7 @@ object JdbcDialects { registerDialect(DB2Dialect) registerDialect(MsSqlServerDialect) registerDialect(DerbyDialect) + registerDialect(OracleDialect) /** @@ -323,6 +324,7 @@ case object DerbyDialect extends JdbcDialect { */ @DeveloperApi case object OracleDialect extends JdbcDialect { + override def canHandle(url: String): Boolean = url.startsWith("jdbc:oracle") override def getCatalystType( sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = { // Handle NUMBER fields that have no precision/scale in special way From 839bcb582e138144b2d0757c9471de3a7cacc2ac Mon Sep 17 00:00:00 2001 From: Travis Hegner Date: Thu, 5 Nov 2015 10:06:19 -0500 Subject: [PATCH 3/4] more attribution --- .../src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala index f28cd0ba55c33..0ed9510938835 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala @@ -320,7 +320,7 @@ case object DerbyDialect extends JdbcDialect { * :: DeveloperApi :: * Default Oracle dialect, mapping a nonspecific * numeric type to a general decimal type. - * Solution by @bdolbeare (github.com) + * Solution by @cloud-fan and @bdolbeare (github.com) */ @DeveloperApi case object OracleDialect extends JdbcDialect { From a1370a7a93e1bc4e2f908be980165223d0f67d58 Mon Sep 17 00:00:00 2001 From: Travis Hegner Date: Thu, 5 Nov 2015 11:10:35 -0500 Subject: [PATCH 4/4] style cleanup --- .../scala/org/apache/spark/sql/jdbc/JdbcDialects.scala | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala index 0ed9510938835..ec1ccb754f30c 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/jdbc/JdbcDialects.scala @@ -318,7 +318,7 @@ case object DerbyDialect extends JdbcDialect { /** * :: DeveloperApi :: - * Default Oracle dialect, mapping a nonspecific + * Default Oracle dialect, mapping a nonspecific * numeric type to a general decimal type. * Solution by @cloud-fan and @bdolbeare (github.com) */ @@ -330,9 +330,10 @@ case object OracleDialect extends JdbcDialect { // Handle NUMBER fields that have no precision/scale in special way // because JDBC ResultSetMetaData converts this to 0 procision and -127 scale if (sqlType == Types.NUMERIC && size == 0) { - // This is sub-optimal as we have to pick a precision/scale in advance whereas the data in Oracle is allowed - // to have different precision/scale for each value. This conversion works in our domain for now though we - // need a more durable solution. Look into changing JDBCRDD (line 406): + // This is sub-optimal as we have to pick a precision/scale in advance whereas the data + // in Oracle is allowed to have different precision/scale for each value. + // This conversion works in our domain for now though we need a more durable solution. + // Look into changing JDBCRDD (line 406): // FROM: mutableRow.update(i, Decimal(decimalVal, p, s)) // TO: mutableRow.update(i, Decimal(decimalVal)) Some(DecimalType(DecimalType.MAX_PRECISION, 10))