apache
diff --git a/‎common/utils/src/main/scala/org/apache/spark/internal/LogKey.scala‎
Lines changed: 2 additions & 0 deletions b/‎common/utils/src/main/scala/org/apache/spark/internal/LogKey.scala‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDataToCatalyst.scala‎
Lines changed: 4 additions & 2 deletions b/‎connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDataToCatalyst.scala‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala‎
Lines changed: 8 additions & 4 deletions b/‎connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroDeserializer.scala‎
Lines changed: 8 additions & 4 deletions
diff --git a/‎connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala‎
Lines changed: 2 additions & 1 deletion b/‎connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroFileFormat.scala‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala‎
Lines changed: 31 additions & 0 deletions b/‎connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroOptions.scala‎
Lines changed: 31 additions & 0 deletions
diff --git a/‎connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala‎
Lines changed: 2 additions & 1 deletion b/‎connector/avro/src/main/scala/org/apache/spark/sql/avro/AvroUtils.scala‎
Lines changed: 2 additions & 1 deletion
@@ -266,6 +266,7 @@ private[spark] object LogKeys {
   case object FEATURE_NAME extends LogKey
   case object FETCH_SIZE extends LogKey
   case object FIELD_NAME extends LogKey
+  case object FIELD_TYPE extends LogKey
   case object FILES extends LogKey
   case object FILE_ABSOLUTE_PATH extends LogKey
   case object FILE_END_OFFSET extends LogKey
@@ -652,6 +653,7 @@ private[spark] object LogKeys {
   case object RECEIVER_IDS extends LogKey
   case object RECORDS extends LogKey
   case object RECOVERY_STATE extends LogKey
+  case object RECURSIVE_DEPTH extends LogKey
   case object REDACTED_STATEMENT extends LogKey
   case object REDUCE_ID extends LogKey
   case object REGEX extends LogKey
 
@@ -42,7 +42,8 @@ private[sql] case class AvroDataToCatalyst(
     val dt = SchemaConverters.toSqlType(
       expectedSchema,
       avroOptions.useStableIdForUnionType,
-      avroOptions.stableIdPrefixForUnionType).dataType
+      avroOptions.stableIdPrefixForUnionType,
+      avroOptions.recursiveFieldMaxDepth).dataType
     parseMode match {
       // With PermissiveMode, the output Catalyst row might contain columns of null values for
       // corrupt records, even if some of the columns are not nullable in the user-provided schema.
@@ -69,7 +70,8 @@ private[sql] case class AvroDataToCatalyst(
       dataType,
       avroOptions.datetimeRebaseModeInRead,
       avroOptions.useStableIdForUnionType,
-      avroOptions.stableIdPrefixForUnionType)
+      avroOptions.stableIdPrefixForUnionType,
+      avroOptions.recursiveFieldMaxDepth)
 
   @transient private var decoder: BinaryDecoder = _
 
 
@@ -51,22 +51,25 @@ private[sql] class AvroDeserializer(
     datetimeRebaseSpec: RebaseSpec,
     filters: StructFilters,
     useStableIdForUnionType: Boolean,
-    stableIdPrefixForUnionType: String) {
+    stableIdPrefixForUnionType: String,
+    recursiveFieldMaxDepth: Int) {
 
   def this(
       rootAvroType: Schema,
       rootCatalystType: DataType,
       datetimeRebaseMode: String,
       useStableIdForUnionType: Boolean,
-      stableIdPrefixForUnionType: String) = {
+      stableIdPrefixForUnionType: String,
+      recursiveFieldMaxDepth: Int) = {
     this(
       rootAvroType,
       rootCatalystType,
       positionalFieldMatch = false,
       RebaseSpec(LegacyBehaviorPolicy.withName(datetimeRebaseMode)),
       new NoopFilters,
       useStableIdForUnionType,
-      stableIdPrefixForUnionType)
+      stableIdPrefixForUnionType,
+      recursiveFieldMaxDepth)
   }
 
   private lazy val decimalConversions = new DecimalConversion()
@@ -128,7 +131,8 @@ private[sql] class AvroDeserializer(
         s"schema is incompatible (avroType = $avroType, sqlType = ${catalystType.sql})"
 
     val realDataType = SchemaConverters.toSqlType(
-      avroType, useStableIdForUnionType, stableIdPrefixForUnionType).dataType
+      avroType, useStableIdForUnionType, stableIdPrefixForUnionType,
+      recursiveFieldMaxDepth).dataType
 
     (avroType.getType, catalystType) match {
       case (NULL, NullType) => (updater, ordinal, _) =>
 
@@ -145,7 +145,8 @@ private[sql] class AvroFileFormat extends FileFormat
             datetimeRebaseMode,
             avroFilters,
             parsedOptions.useStableIdForUnionType,
-            parsedOptions.stableIdPrefixForUnionType)
+            parsedOptions.stableIdPrefixForUnionType,
+            parsedOptions.recursiveFieldMaxDepth)
           override val stopPosition = file.start + file.length
 
           override def hasNext: Boolean = hasNextRow
 
@@ -27,6 +27,7 @@ import org.apache.spark.internal.Logging
 import org.apache.spark.sql.SparkSession
 import org.apache.spark.sql.catalyst.{DataSourceOptions, FileSourceOptions}
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, FailFastMode, ParseMode}
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.internal.SQLConf
 
 /**
@@ -136,6 +137,15 @@ private[sql] class AvroOptions(
 
   val stableIdPrefixForUnionType: String = parameters
     .getOrElse(STABLE_ID_PREFIX_FOR_UNION_TYPE, "member_")
+
+  val recursiveFieldMaxDepth: Int =
+    parameters.get(RECURSIVE_FIELD_MAX_DEPTH).map(_.toInt).getOrElse(-1)
+
+  if (recursiveFieldMaxDepth > RECURSIVE_FIELD_MAX_DEPTH_LIMIT) {
+    throw QueryCompilationErrors.avroOptionsException(
+      RECURSIVE_FIELD_MAX_DEPTH,
+      s"Should not be greater than $RECURSIVE_FIELD_MAX_DEPTH_LIMIT.")
+  }
 }
 
 private[sql] object AvroOptions extends DataSourceOptions {
@@ -170,4 +180,25 @@ private[sql] object AvroOptions extends DataSourceOptions {
   // When STABLE_ID_FOR_UNION_TYPE is enabled, the option allows to configure the prefix for fields
   // of Avro Union type.
   val STABLE_ID_PREFIX_FOR_UNION_TYPE = newOption("stableIdentifierPrefixForUnionType")
+
+  /**
+   * Adds support for recursive fields. If this option is not specified or is set to 0, recursive
+   * fields are not permitted. Setting it to 1 drops all recursive fields, 2 allows recursive
+   * fields to be recursed once, and 3 allows it to be recursed twice and so on, up to 15.
+   * Values larger than 15 are not allowed in order to avoid inadvertently creating very large
+   * schemas. If an avro message has depth beyond this limit, the Spark struct returned is
+   * truncated after the recursion limit.
+   *
+   * Examples: Consider an Avro schema with a recursive field:
+   * {"type" : "record", "name" : "Node", "fields" : [{"name": "Id", "type": "int"},
+   * {"name": "Next", "type": ["null", "Node"]}]}
+   * The following lists the parsed schema with different values for this setting.
+   *  1:  `struct<Id: int>`
+   *  2:  `struct<Id: int, Next: struct<Id: int>>`
+   *  3:  `struct<Id: int, Next: struct<Id: int, Next: struct<Id: int>>>`
+   * and so on.
+   */
+  val RECURSIVE_FIELD_MAX_DEPTH = newOption("recursiveFieldMaxDepth")
+
+  val RECURSIVE_FIELD_MAX_DEPTH_LIMIT: Int = 15
 }
@@ -65,7 +65,8 @@ private[sql] object AvroUtils extends Logging {
     SchemaConverters.toSqlType(
       avroSchema,
       parsedOptions.useStableIdForUnionType,
-      parsedOptions.stableIdPrefixForUnionType).dataType match {
+      parsedOptions.stableIdPrefixForUnionType,
+      parsedOptions.recursiveFieldMaxDepth).dataType match {
       case t: StructType => Some(t)
       case _ => throw new RuntimeException(
         s"""Avro schema cannot be converted to a Spark SQL StructType: