1717
1818package org .apache .spark .sql .hive
1919
20+ import org .apache .hadoop .hive .common .StatsSetupConst ._
21+
2022import scala .collection .JavaConverters ._
2123import scala .collection .mutable
2224
@@ -770,8 +772,6 @@ private[hive] case class MetastoreRelation
770772
771773 @ transient override lazy val statistics : Statistics = Statistics (
772774 sizeInBytes = {
773- val totalSize = hiveQlTable.getParameters.get(StatsSetupConst .TOTAL_SIZE )
774- val rawDataSize = hiveQlTable.getParameters.get(StatsSetupConst .RAW_DATA_SIZE )
775775 // TODO: check if this estimate is valid for tables after partition pruning.
776776 // NOTE: getting `totalSize` directly from params is kind of hacky, but this should be
777777 // relatively cheap if parameters for the table are populated into the metastore. An
@@ -782,10 +782,7 @@ private[hive] case class MetastoreRelation
782782 // When table is external,`totalSize` is always zero, which will influence join strategy
783783 // so when `totalSize` is zero, use `rawDataSize` instead
784784 // if the size is still less than zero, we use default size
785- Option (totalSize).map(_.toLong).filter(_ > 0 )
786- .getOrElse(Option (rawDataSize).map(_.toLong).filter(_ > 0 )
787- .getOrElse(Option (calculateInput().getLength).filter(_ > 0 )
788- .getOrElse(sqlContext.conf.defaultSizeInBytes))))
785+ calculateInput().filter(_ > 0 ).getOrElse(sqlContext.conf.defaultSizeInBytes))
789786 }
790787 )
791788
@@ -820,6 +817,7 @@ private[hive] case class MetastoreRelation
820817 tPartition.setDbName(databaseName)
821818 tPartition.setTableName(tableName)
822819 tPartition.setValues(p.values.asJava)
820+ tPartition.setParameters(p.properties.asJava)
823821
824822 val sd = new org.apache.hadoop.hive.metastore.api.StorageDescriptor ()
825823 tPartition.setSd(sd)
@@ -860,7 +858,19 @@ private[hive] case class MetastoreRelation
860858 }
861859 }
862860
863- private def calculateInput (): ContentSummary = {
861+ private def calculateInput (): Option [Long ] = {
862+ var partitions : Seq [Partition ] = Nil
863+ if (hiveQlTable.isPartitioned) {
864+ partitions = getHiveQlPartitions(pruningPredicates)
865+ }
866+
867+ // try with stats in table/partition properties
868+ val fromStats = getFromStats(hiveQlTable, partitions, TOTAL_SIZE ).orElse(
869+ getFromStats(hiveQlTable, partitions, RAW_DATA_SIZE ))
870+ if (fromStats.isDefined || ! sqlContext.hiveCalculateStatsRuntime) {
871+ return fromStats
872+ }
873+
864874 // create dummy mapwork
865875 val dummy : MapWork = new MapWork
866876 val alias : String = " _dummy"
@@ -870,7 +880,7 @@ private[hive] case class MetastoreRelation
870880 val pathToAliases = dummy.getPathToAliases
871881 val pathToPartition = dummy.getPathToPartitionInfo
872882 if (hiveQlTable.isPartitioned) {
873- for (partition <- getHiveQlPartitions(pruningPredicates) ) {
883+ for (partition <- partitions ) {
874884 val partPath = getDnsPath(partition.getDataLocation, sqlContext.hiveconf).toString
875885 pathToAliases.put(partPath, new util.ArrayList (util.Arrays .asList(alias)))
876886 pathToPartition.put(partPath, new PartitionDesc (partition, tableDesc))
@@ -881,7 +891,24 @@ private[hive] case class MetastoreRelation
881891 pathToPartition.put(tablePath, new PartitionDesc (tableDesc, null ))
882892 }
883893 // calculate summary
884- Utilities .getInputSummary(new Context (sqlContext.hiveconf), dummy, null )
894+ Some (Utilities .getInputSummary(new Context (sqlContext.hiveconf), dummy, null ).getLength)
895+ }
896+
897+ private def getFromStats (table : Table , partitions : Seq [Partition ], statKey : String ):
898+ Option [Long ] = {
899+ if (table.isPartitioned) {
900+ var totalSize : Long = 0
901+ for (partition <- partitions) {
902+ val partSize = Option (partition.getParameters.get(statKey)).map(_.toLong).filter(_ > 0 )
903+ if (partSize.isEmpty) {
904+ return None ;
905+ }
906+ totalSize += partSize.get
907+ }
908+ Some (totalSize)
909+ } else {
910+ Option (table.getParameters.get(statKey)).map(_.toLong).filter(_ > 0 )
911+ }
885912 }
886913
887914 private [this ] def castFromString (value : String , dataType : DataType ) = {
0 commit comments