-
Notifications
You must be signed in to change notification settings - Fork 28.9k
[SPARK-10101] [SQL] Add maxlength to JDBC field metadata and override JDBCDialects for strings as VARCHAR. #8374
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
c7abad3
f1d0b9e
faff507
c4b4477
35e61f3
dd22b2f
cd809c5
0cfeefa
dddc137
5f532e8
03f4d96
27f118b
44e1978
e605a11
4cae11b
4c2a7a4
a0cb024
d50bdf7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -81,6 +81,14 @@ abstract class JdbcDialect { | |
| */ | ||
| def getJDBCType(dt: DataType): Option[JdbcType] = None | ||
|
|
||
| /** | ||
| * Retrieve the jdbc / sql type for a given datatype. | ||
| * @param dt The datatype (e.g. [[org.apache.spark.sql.types.StringType]]) | ||
| * @param md The metadata | ||
| * @return The new JdbcType if there is an override for this DataType | ||
| */ | ||
| def getJDBCType(dt: DataType, md: Metadata): Option[JdbcType] = None | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How about calling
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sorry did not get the change you are suggesting, do you mean to call getJDBCType(dt,Metadata.empty) from getJDBCType(dt: DataType).
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @rama-mullapudi yes. |
||
|
|
||
| /** | ||
| * Quotes the identifier. This is used to put quotes around the identifier in case the column | ||
| * name is a reserved keyword, or in case it contains characters that require quotes (e.g. space). | ||
|
|
@@ -138,7 +146,8 @@ object JdbcDialects { | |
| registerDialect(PostgresDialect) | ||
| registerDialect(DB2Dialect) | ||
| registerDialect(MsSqlServerDialect) | ||
|
|
||
| registerDialect(OracleDialect) | ||
| registerDialect(NetezzaDialect) | ||
|
|
||
| /** | ||
| * Fetch the JdbcDialect class corresponding to a given database url. | ||
|
|
@@ -173,8 +182,8 @@ class AggregatedDialect(dialects: List[JdbcDialect]) extends JdbcDialect { | |
| dialects.flatMap(_.getCatalystType(sqlType, typeName, size, md)).headOption | ||
| } | ||
|
|
||
| override def getJDBCType(dt: DataType): Option[JdbcType] = { | ||
| dialects.flatMap(_.getJDBCType(dt)).headOption | ||
| override def getJDBCType(dt: DataType, md: Metadata): Option[JdbcType] = { | ||
| dialects.flatMap(_.getJDBCType(dt, md)).headOption | ||
| } | ||
| } | ||
|
|
||
|
|
@@ -205,7 +214,7 @@ case object PostgresDialect extends JdbcDialect { | |
| } else None | ||
| } | ||
|
|
||
| override def getJDBCType(dt: DataType): Option[JdbcType] = dt match { | ||
| override def getJDBCType(dt: DataType, md: Metadata): Option[JdbcType] = dt match { | ||
| case StringType => Some(JdbcType("TEXT", java.sql.Types.CHAR)) | ||
| case BinaryType => Some(JdbcType("BYTEA", java.sql.Types.BINARY)) | ||
| case BooleanType => Some(JdbcType("BOOLEAN", java.sql.Types.BOOLEAN)) | ||
|
|
@@ -253,10 +262,8 @@ case object MySQLDialect extends JdbcDialect { | |
| */ | ||
| @DeveloperApi | ||
| case object DB2Dialect extends JdbcDialect { | ||
|
|
||
| override def canHandle(url: String): Boolean = url.startsWith("jdbc:db2") | ||
|
|
||
| override def getJDBCType(dt: DataType): Option[JdbcType] = dt match { | ||
| override def getJDBCType(dt: DataType, md: Metadata): Option[JdbcType] = dt match { | ||
| case StringType => Some(JdbcType("CLOB", java.sql.Types.CLOB)) | ||
| case BooleanType => Some(JdbcType("CHAR(1)", java.sql.Types.CHAR)) | ||
| case _ => None | ||
|
|
@@ -278,3 +285,59 @@ case object MsSqlServerDialect extends JdbcDialect { | |
| } else None | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * :: DeveloperApi :: | ||
| * Default Oracle dialect, mapping string/boolean on write to valid Oracle types. | ||
| */ | ||
| @DeveloperApi | ||
| case object OracleDialect extends JdbcDialect { | ||
| override def canHandle(url: String): Boolean = url.startsWith("jdbc:oracle") | ||
| override def getCatalystType( | ||
| sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = { | ||
| if (sqlType == Types.VARCHAR && typeName.equals("VARCHAR")) { | ||
| // Save varchar size to metadata | ||
| md.putLong("maxlength", size) | ||
| Some(LongType) | ||
| } else None | ||
| } | ||
|
|
||
| override def getJDBCType(dt: DataType, md: Metadata): Option[JdbcType] = { | ||
| if (dt == StringType && md.contains("maxlength")) { | ||
| Some(JdbcType(s"VARCHAR(${md.getLong("maxlength")})", java.sql.Types.VARCHAR)) | ||
| } else if (dt == StringType ) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: Remove the end single space in parenthesis. |
||
| Some(JdbcType("CLOB", java.sql.Types.CLOB)) | ||
| } else if (dt == BooleanType ) { | ||
| Some(JdbcType("CHAR(1)", java.sql.Types.CHAR)) | ||
| } else None | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * :: DeveloperApi :: | ||
| * Default Netezza dialect, mapping string/boolean on write to valid Netezza types. | ||
| */ | ||
| @DeveloperApi | ||
| case object NetezzaDialect extends JdbcDialect { | ||
| override def canHandle(url: String): Boolean = url.startsWith("jdbc:netezza") | ||
| override def getCatalystType( | ||
| sqlType: Int, typeName: String, size: Int, md: MetadataBuilder): Option[DataType] = { | ||
| if (sqlType == Types.VARCHAR && typeName.equals("VARCHAR")) { | ||
| // Save varchar size to metadata | ||
| md.putLong("maxlength", size) | ||
| Some(LongType) | ||
| } else None | ||
| } | ||
|
|
||
| override def getJDBCType(dt: DataType, md: Metadata): Option[JdbcType] = { | ||
| if (dt == StringType && md.contains("maxlength")) { | ||
| Some(JdbcType(s"VARCHAR(${md.getLong("maxlength")})", java.sql.Types.VARCHAR)) | ||
| } else if (dt == StringType ) { | ||
| Some(JdbcType("VARCHAR(255)", java.sql.Types.VARCHAR)) | ||
| } else if (dt == BinaryType ) { | ||
| Some(JdbcType("BYTEINT", java.sql.Types.BINARY)) | ||
| } else if (dt == BooleanType ) { | ||
| Some(JdbcType("BOOLEAN", java.sql.Types.BOOLEAN)) | ||
| } else None | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
While this is a
DeveloperAPI, it is public so it would be good to fix without breaking binary compatibility.