From 6f575a05880158c2ed32bdb1dfd6627fecfd635e Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Fri, 12 Aug 2016 12:02:31 -0700 Subject: [PATCH 01/27] first attempt. --- .../scala/org/apache/spark/sql/SQLQueryTestSuite.scala | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index 069a9b665eb36..ff0280ff35b62 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -27,7 +27,7 @@ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.catalyst.util.{fileToString, stringToFile} import org.apache.spark.sql.test.SharedSQLContext -import org.apache.spark.sql.types.StructType +import org.apache.spark.sql.types.{IntegerType, StringType, StructType} /** * End-to-end test cases for SQL queries. @@ -229,6 +229,10 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext { files ++ dirs.flatMap(listFilesRecursively) } + private def getFilePath(path: String): String = { + Thread.currentThread().getContextClassLoader.getResource(path).toString + } + /** Load built-in test tables into the SparkSession. */ private def loadTestData(session: SparkSession): Unit = { import session.implicits._ @@ -246,6 +250,10 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext { Tuple1(Map(1 -> "a5")) :: Nil) .toDF("mapcol") .createOrReplaceTempView("mapdata") + + val srcSchema = new StructType().add("key", IntegerType).add("value", StringType) + session.read.schema(srcSchema).json(getFilePath("test-data/kv1.json")) + .createOrReplaceTempView("src") } private val originalTimeZone = TimeZone.getDefault From 9888f8a505ba679b66086fad712a084483a40826 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Fri, 12 Aug 2016 12:04:05 -0700 Subject: [PATCH 02/27] add files --- .../resources/sql-tests/inputs/auto_join0.sql | 9 + .../sql-tests/results/auto_join0.sql.out | 18 + .../src/test/resources/test-data/kv1.json | 500 ++++++++++++++++++ 3 files changed, 527 insertions(+) create mode 100644 sql/core/src/test/resources/sql-tests/inputs/auto_join0.sql create mode 100644 sql/core/src/test/resources/sql-tests/results/auto_join0.sql.out create mode 100644 sql/core/src/test/resources/test-data/kv1.json diff --git a/sql/core/src/test/resources/sql-tests/inputs/auto_join0.sql b/sql/core/src/test/resources/sql-tests/inputs/auto_join0.sql new file mode 100644 index 0000000000000..46fc0a0875220 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/auto_join0.sql @@ -0,0 +1,9 @@ +select sum(hash(a.k1,a.v1,a.k2, a.v2)) +from ( +SELECT src1.key as k1, src1.value as v1, + src2.key as k2, src2.value as v2 FROM + (SELECT * FROM src WHERE src.key < 10) src1 + JOIN + (SELECT * FROM src WHERE src.key < 10) src2 + SORT BY k1, v1, k2, v2 +) a; diff --git a/sql/core/src/test/resources/sql-tests/results/auto_join0.sql.out b/sql/core/src/test/resources/sql-tests/results/auto_join0.sql.out new file mode 100644 index 0000000000000..8a15a480d6fc2 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/auto_join0.sql.out @@ -0,0 +1,18 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 1 + + +-- !query 0 +select sum(hash(a.k1,a.v1,a.k2, a.v2)) +from ( +SELECT src1.key as k1, src1.value as v1, + src2.key as k2, src2.value as v2 FROM + (SELECT * FROM src WHERE src.key < 10) src1 + JOIN + (SELECT * FROM src WHERE src.key < 10) src2 + SORT BY k1, v1, k2, v2 +) a +-- !query 0 schema +struct +-- !query 0 output +42294652308 diff --git a/sql/core/src/test/resources/test-data/kv1.json b/sql/core/src/test/resources/test-data/kv1.json new file mode 100644 index 0000000000000..5981db6e17b6f --- /dev/null +++ b/sql/core/src/test/resources/test-data/kv1.json @@ -0,0 +1,500 @@ +{"key":238,"value":"val_238"} +{"key":86,"value":"val_86"} +{"key":311,"value":"val_311"} +{"key":27,"value":"val_27"} +{"key":165,"value":"val_165"} +{"key":409,"value":"val_409"} +{"key":255,"value":"val_255"} +{"key":278,"value":"val_278"} +{"key":98,"value":"val_98"} +{"key":484,"value":"val_484"} +{"key":265,"value":"val_265"} +{"key":193,"value":"val_193"} +{"key":401,"value":"val_401"} +{"key":150,"value":"val_150"} +{"key":273,"value":"val_273"} +{"key":224,"value":"val_224"} +{"key":369,"value":"val_369"} +{"key":66,"value":"val_66"} +{"key":128,"value":"val_128"} +{"key":213,"value":"val_213"} +{"key":146,"value":"val_146"} +{"key":406,"value":"val_406"} +{"key":429,"value":"val_429"} +{"key":374,"value":"val_374"} +{"key":152,"value":"val_152"} +{"key":469,"value":"val_469"} +{"key":145,"value":"val_145"} +{"key":495,"value":"val_495"} +{"key":37,"value":"val_37"} +{"key":327,"value":"val_327"} +{"key":281,"value":"val_281"} +{"key":277,"value":"val_277"} +{"key":209,"value":"val_209"} +{"key":15,"value":"val_15"} +{"key":82,"value":"val_82"} +{"key":403,"value":"val_403"} +{"key":166,"value":"val_166"} +{"key":417,"value":"val_417"} +{"key":430,"value":"val_430"} +{"key":252,"value":"val_252"} +{"key":292,"value":"val_292"} +{"key":219,"value":"val_219"} +{"key":287,"value":"val_287"} +{"key":153,"value":"val_153"} +{"key":193,"value":"val_193"} +{"key":338,"value":"val_338"} +{"key":446,"value":"val_446"} +{"key":459,"value":"val_459"} +{"key":394,"value":"val_394"} +{"key":237,"value":"val_237"} +{"key":482,"value":"val_482"} +{"key":174,"value":"val_174"} +{"key":413,"value":"val_413"} +{"key":494,"value":"val_494"} +{"key":207,"value":"val_207"} +{"key":199,"value":"val_199"} +{"key":466,"value":"val_466"} +{"key":208,"value":"val_208"} +{"key":174,"value":"val_174"} +{"key":399,"value":"val_399"} +{"key":396,"value":"val_396"} +{"key":247,"value":"val_247"} +{"key":417,"value":"val_417"} +{"key":489,"value":"val_489"} +{"key":162,"value":"val_162"} +{"key":377,"value":"val_377"} +{"key":397,"value":"val_397"} +{"key":309,"value":"val_309"} +{"key":365,"value":"val_365"} +{"key":266,"value":"val_266"} +{"key":439,"value":"val_439"} +{"key":342,"value":"val_342"} +{"key":367,"value":"val_367"} +{"key":325,"value":"val_325"} +{"key":167,"value":"val_167"} +{"key":195,"value":"val_195"} +{"key":475,"value":"val_475"} +{"key":17,"value":"val_17"} +{"key":113,"value":"val_113"} +{"key":155,"value":"val_155"} +{"key":203,"value":"val_203"} +{"key":339,"value":"val_339"} +{"key":0,"value":"val_0"} +{"key":455,"value":"val_455"} +{"key":128,"value":"val_128"} +{"key":311,"value":"val_311"} +{"key":316,"value":"val_316"} +{"key":57,"value":"val_57"} +{"key":302,"value":"val_302"} +{"key":205,"value":"val_205"} +{"key":149,"value":"val_149"} +{"key":438,"value":"val_438"} +{"key":345,"value":"val_345"} +{"key":129,"value":"val_129"} +{"key":170,"value":"val_170"} +{"key":20,"value":"val_20"} +{"key":489,"value":"val_489"} +{"key":157,"value":"val_157"} +{"key":378,"value":"val_378"} +{"key":221,"value":"val_221"} +{"key":92,"value":"val_92"} +{"key":111,"value":"val_111"} +{"key":47,"value":"val_47"} +{"key":72,"value":"val_72"} +{"key":4,"value":"val_4"} +{"key":280,"value":"val_280"} +{"key":35,"value":"val_35"} +{"key":427,"value":"val_427"} +{"key":277,"value":"val_277"} +{"key":208,"value":"val_208"} +{"key":356,"value":"val_356"} +{"key":399,"value":"val_399"} +{"key":169,"value":"val_169"} +{"key":382,"value":"val_382"} +{"key":498,"value":"val_498"} +{"key":125,"value":"val_125"} +{"key":386,"value":"val_386"} +{"key":437,"value":"val_437"} +{"key":469,"value":"val_469"} +{"key":192,"value":"val_192"} +{"key":286,"value":"val_286"} +{"key":187,"value":"val_187"} +{"key":176,"value":"val_176"} +{"key":54,"value":"val_54"} +{"key":459,"value":"val_459"} +{"key":51,"value":"val_51"} +{"key":138,"value":"val_138"} +{"key":103,"value":"val_103"} +{"key":239,"value":"val_239"} +{"key":213,"value":"val_213"} +{"key":216,"value":"val_216"} +{"key":430,"value":"val_430"} +{"key":278,"value":"val_278"} +{"key":176,"value":"val_176"} +{"key":289,"value":"val_289"} +{"key":221,"value":"val_221"} +{"key":65,"value":"val_65"} +{"key":318,"value":"val_318"} +{"key":332,"value":"val_332"} +{"key":311,"value":"val_311"} +{"key":275,"value":"val_275"} +{"key":137,"value":"val_137"} +{"key":241,"value":"val_241"} +{"key":83,"value":"val_83"} +{"key":333,"value":"val_333"} +{"key":180,"value":"val_180"} +{"key":284,"value":"val_284"} +{"key":12,"value":"val_12"} +{"key":230,"value":"val_230"} +{"key":181,"value":"val_181"} +{"key":67,"value":"val_67"} +{"key":260,"value":"val_260"} +{"key":404,"value":"val_404"} +{"key":384,"value":"val_384"} +{"key":489,"value":"val_489"} +{"key":353,"value":"val_353"} +{"key":373,"value":"val_373"} +{"key":272,"value":"val_272"} +{"key":138,"value":"val_138"} +{"key":217,"value":"val_217"} +{"key":84,"value":"val_84"} +{"key":348,"value":"val_348"} +{"key":466,"value":"val_466"} +{"key":58,"value":"val_58"} +{"key":8,"value":"val_8"} +{"key":411,"value":"val_411"} +{"key":230,"value":"val_230"} +{"key":208,"value":"val_208"} +{"key":348,"value":"val_348"} +{"key":24,"value":"val_24"} +{"key":463,"value":"val_463"} +{"key":431,"value":"val_431"} +{"key":179,"value":"val_179"} +{"key":172,"value":"val_172"} +{"key":42,"value":"val_42"} +{"key":129,"value":"val_129"} +{"key":158,"value":"val_158"} +{"key":119,"value":"val_119"} +{"key":496,"value":"val_496"} +{"key":0,"value":"val_0"} +{"key":322,"value":"val_322"} +{"key":197,"value":"val_197"} +{"key":468,"value":"val_468"} +{"key":393,"value":"val_393"} +{"key":454,"value":"val_454"} +{"key":100,"value":"val_100"} +{"key":298,"value":"val_298"} +{"key":199,"value":"val_199"} +{"key":191,"value":"val_191"} +{"key":418,"value":"val_418"} +{"key":96,"value":"val_96"} +{"key":26,"value":"val_26"} +{"key":165,"value":"val_165"} +{"key":327,"value":"val_327"} +{"key":230,"value":"val_230"} +{"key":205,"value":"val_205"} +{"key":120,"value":"val_120"} +{"key":131,"value":"val_131"} +{"key":51,"value":"val_51"} +{"key":404,"value":"val_404"} +{"key":43,"value":"val_43"} +{"key":436,"value":"val_436"} +{"key":156,"value":"val_156"} +{"key":469,"value":"val_469"} +{"key":468,"value":"val_468"} +{"key":308,"value":"val_308"} +{"key":95,"value":"val_95"} +{"key":196,"value":"val_196"} +{"key":288,"value":"val_288"} +{"key":481,"value":"val_481"} +{"key":457,"value":"val_457"} +{"key":98,"value":"val_98"} +{"key":282,"value":"val_282"} +{"key":197,"value":"val_197"} +{"key":187,"value":"val_187"} +{"key":318,"value":"val_318"} +{"key":318,"value":"val_318"} +{"key":409,"value":"val_409"} +{"key":470,"value":"val_470"} +{"key":137,"value":"val_137"} +{"key":369,"value":"val_369"} +{"key":316,"value":"val_316"} +{"key":169,"value":"val_169"} +{"key":413,"value":"val_413"} +{"key":85,"value":"val_85"} +{"key":77,"value":"val_77"} +{"key":0,"value":"val_0"} +{"key":490,"value":"val_490"} +{"key":87,"value":"val_87"} +{"key":364,"value":"val_364"} +{"key":179,"value":"val_179"} +{"key":118,"value":"val_118"} +{"key":134,"value":"val_134"} +{"key":395,"value":"val_395"} +{"key":282,"value":"val_282"} +{"key":138,"value":"val_138"} +{"key":238,"value":"val_238"} +{"key":419,"value":"val_419"} +{"key":15,"value":"val_15"} +{"key":118,"value":"val_118"} +{"key":72,"value":"val_72"} +{"key":90,"value":"val_90"} +{"key":307,"value":"val_307"} +{"key":19,"value":"val_19"} +{"key":435,"value":"val_435"} +{"key":10,"value":"val_10"} +{"key":277,"value":"val_277"} +{"key":273,"value":"val_273"} +{"key":306,"value":"val_306"} +{"key":224,"value":"val_224"} +{"key":309,"value":"val_309"} +{"key":389,"value":"val_389"} +{"key":327,"value":"val_327"} +{"key":242,"value":"val_242"} +{"key":369,"value":"val_369"} +{"key":392,"value":"val_392"} +{"key":272,"value":"val_272"} +{"key":331,"value":"val_331"} +{"key":401,"value":"val_401"} +{"key":242,"value":"val_242"} +{"key":452,"value":"val_452"} +{"key":177,"value":"val_177"} +{"key":226,"value":"val_226"} +{"key":5,"value":"val_5"} +{"key":497,"value":"val_497"} +{"key":402,"value":"val_402"} +{"key":396,"value":"val_396"} +{"key":317,"value":"val_317"} +{"key":395,"value":"val_395"} +{"key":58,"value":"val_58"} +{"key":35,"value":"val_35"} +{"key":336,"value":"val_336"} +{"key":95,"value":"val_95"} +{"key":11,"value":"val_11"} +{"key":168,"value":"val_168"} +{"key":34,"value":"val_34"} +{"key":229,"value":"val_229"} +{"key":233,"value":"val_233"} +{"key":143,"value":"val_143"} +{"key":472,"value":"val_472"} +{"key":322,"value":"val_322"} +{"key":498,"value":"val_498"} +{"key":160,"value":"val_160"} +{"key":195,"value":"val_195"} +{"key":42,"value":"val_42"} +{"key":321,"value":"val_321"} +{"key":430,"value":"val_430"} +{"key":119,"value":"val_119"} +{"key":489,"value":"val_489"} +{"key":458,"value":"val_458"} +{"key":78,"value":"val_78"} +{"key":76,"value":"val_76"} +{"key":41,"value":"val_41"} +{"key":223,"value":"val_223"} +{"key":492,"value":"val_492"} +{"key":149,"value":"val_149"} +{"key":449,"value":"val_449"} +{"key":218,"value":"val_218"} +{"key":228,"value":"val_228"} +{"key":138,"value":"val_138"} +{"key":453,"value":"val_453"} +{"key":30,"value":"val_30"} +{"key":209,"value":"val_209"} +{"key":64,"value":"val_64"} +{"key":468,"value":"val_468"} +{"key":76,"value":"val_76"} +{"key":74,"value":"val_74"} +{"key":342,"value":"val_342"} +{"key":69,"value":"val_69"} +{"key":230,"value":"val_230"} +{"key":33,"value":"val_33"} +{"key":368,"value":"val_368"} +{"key":103,"value":"val_103"} +{"key":296,"value":"val_296"} +{"key":113,"value":"val_113"} +{"key":216,"value":"val_216"} +{"key":367,"value":"val_367"} +{"key":344,"value":"val_344"} +{"key":167,"value":"val_167"} +{"key":274,"value":"val_274"} +{"key":219,"value":"val_219"} +{"key":239,"value":"val_239"} +{"key":485,"value":"val_485"} +{"key":116,"value":"val_116"} +{"key":223,"value":"val_223"} +{"key":256,"value":"val_256"} +{"key":263,"value":"val_263"} +{"key":70,"value":"val_70"} +{"key":487,"value":"val_487"} +{"key":480,"value":"val_480"} +{"key":401,"value":"val_401"} +{"key":288,"value":"val_288"} +{"key":191,"value":"val_191"} +{"key":5,"value":"val_5"} +{"key":244,"value":"val_244"} +{"key":438,"value":"val_438"} +{"key":128,"value":"val_128"} +{"key":467,"value":"val_467"} +{"key":432,"value":"val_432"} +{"key":202,"value":"val_202"} +{"key":316,"value":"val_316"} +{"key":229,"value":"val_229"} +{"key":469,"value":"val_469"} +{"key":463,"value":"val_463"} +{"key":280,"value":"val_280"} +{"key":2,"value":"val_2"} +{"key":35,"value":"val_35"} +{"key":283,"value":"val_283"} +{"key":331,"value":"val_331"} +{"key":235,"value":"val_235"} +{"key":80,"value":"val_80"} +{"key":44,"value":"val_44"} +{"key":193,"value":"val_193"} +{"key":321,"value":"val_321"} +{"key":335,"value":"val_335"} +{"key":104,"value":"val_104"} +{"key":466,"value":"val_466"} +{"key":366,"value":"val_366"} +{"key":175,"value":"val_175"} +{"key":403,"value":"val_403"} +{"key":483,"value":"val_483"} +{"key":53,"value":"val_53"} +{"key":105,"value":"val_105"} +{"key":257,"value":"val_257"} +{"key":406,"value":"val_406"} +{"key":409,"value":"val_409"} +{"key":190,"value":"val_190"} +{"key":406,"value":"val_406"} +{"key":401,"value":"val_401"} +{"key":114,"value":"val_114"} +{"key":258,"value":"val_258"} +{"key":90,"value":"val_90"} +{"key":203,"value":"val_203"} +{"key":262,"value":"val_262"} +{"key":348,"value":"val_348"} +{"key":424,"value":"val_424"} +{"key":12,"value":"val_12"} +{"key":396,"value":"val_396"} +{"key":201,"value":"val_201"} +{"key":217,"value":"val_217"} +{"key":164,"value":"val_164"} +{"key":431,"value":"val_431"} +{"key":454,"value":"val_454"} +{"key":478,"value":"val_478"} +{"key":298,"value":"val_298"} +{"key":125,"value":"val_125"} +{"key":431,"value":"val_431"} +{"key":164,"value":"val_164"} +{"key":424,"value":"val_424"} +{"key":187,"value":"val_187"} +{"key":382,"value":"val_382"} +{"key":5,"value":"val_5"} +{"key":70,"value":"val_70"} +{"key":397,"value":"val_397"} +{"key":480,"value":"val_480"} +{"key":291,"value":"val_291"} +{"key":24,"value":"val_24"} +{"key":351,"value":"val_351"} +{"key":255,"value":"val_255"} +{"key":104,"value":"val_104"} +{"key":70,"value":"val_70"} +{"key":163,"value":"val_163"} +{"key":438,"value":"val_438"} +{"key":119,"value":"val_119"} +{"key":414,"value":"val_414"} +{"key":200,"value":"val_200"} +{"key":491,"value":"val_491"} +{"key":237,"value":"val_237"} +{"key":439,"value":"val_439"} +{"key":360,"value":"val_360"} +{"key":248,"value":"val_248"} +{"key":479,"value":"val_479"} +{"key":305,"value":"val_305"} +{"key":417,"value":"val_417"} +{"key":199,"value":"val_199"} +{"key":444,"value":"val_444"} +{"key":120,"value":"val_120"} +{"key":429,"value":"val_429"} +{"key":169,"value":"val_169"} +{"key":443,"value":"val_443"} +{"key":323,"value":"val_323"} +{"key":325,"value":"val_325"} +{"key":277,"value":"val_277"} +{"key":230,"value":"val_230"} +{"key":478,"value":"val_478"} +{"key":178,"value":"val_178"} +{"key":468,"value":"val_468"} +{"key":310,"value":"val_310"} +{"key":317,"value":"val_317"} +{"key":333,"value":"val_333"} +{"key":493,"value":"val_493"} +{"key":460,"value":"val_460"} +{"key":207,"value":"val_207"} +{"key":249,"value":"val_249"} +{"key":265,"value":"val_265"} +{"key":480,"value":"val_480"} +{"key":83,"value":"val_83"} +{"key":136,"value":"val_136"} +{"key":353,"value":"val_353"} +{"key":172,"value":"val_172"} +{"key":214,"value":"val_214"} +{"key":462,"value":"val_462"} +{"key":233,"value":"val_233"} +{"key":406,"value":"val_406"} +{"key":133,"value":"val_133"} +{"key":175,"value":"val_175"} +{"key":189,"value":"val_189"} +{"key":454,"value":"val_454"} +{"key":375,"value":"val_375"} +{"key":401,"value":"val_401"} +{"key":421,"value":"val_421"} +{"key":407,"value":"val_407"} +{"key":384,"value":"val_384"} +{"key":256,"value":"val_256"} +{"key":26,"value":"val_26"} +{"key":134,"value":"val_134"} +{"key":67,"value":"val_67"} +{"key":384,"value":"val_384"} +{"key":379,"value":"val_379"} +{"key":18,"value":"val_18"} +{"key":462,"value":"val_462"} +{"key":492,"value":"val_492"} +{"key":100,"value":"val_100"} +{"key":298,"value":"val_298"} +{"key":9,"value":"val_9"} +{"key":341,"value":"val_341"} +{"key":498,"value":"val_498"} +{"key":146,"value":"val_146"} +{"key":458,"value":"val_458"} +{"key":362,"value":"val_362"} +{"key":186,"value":"val_186"} +{"key":285,"value":"val_285"} +{"key":348,"value":"val_348"} +{"key":167,"value":"val_167"} +{"key":18,"value":"val_18"} +{"key":273,"value":"val_273"} +{"key":183,"value":"val_183"} +{"key":281,"value":"val_281"} +{"key":344,"value":"val_344"} +{"key":97,"value":"val_97"} +{"key":469,"value":"val_469"} +{"key":315,"value":"val_315"} +{"key":84,"value":"val_84"} +{"key":28,"value":"val_28"} +{"key":37,"value":"val_37"} +{"key":448,"value":"val_448"} +{"key":152,"value":"val_152"} +{"key":348,"value":"val_348"} +{"key":307,"value":"val_307"} +{"key":194,"value":"val_194"} +{"key":414,"value":"val_414"} +{"key":477,"value":"val_477"} +{"key":222,"value":"val_222"} +{"key":126,"value":"val_126"} +{"key":90,"value":"val_90"} +{"key":169,"value":"val_169"} +{"key":403,"value":"val_403"} +{"key":400,"value":"val_400"} +{"key":200,"value":"val_200"} +{"key":97,"value":"val_97"} From 239191c9fe8cd69740f6f97fbe7041761f00ec7e Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Fri, 12 Aug 2016 23:53:21 -0700 Subject: [PATCH 03/27] batch 1 --- .../resources/sql-tests/inputs/auto_join0.sql | 9 - .../test/resources/sql-tests/inputs/join.sql | 107 ++++ .../sql-tests/results/auto_join0.sql.out | 18 - .../resources/sql-tests/results/join.sql.out | 189 +++++++ .../src/test/resources/test-data/kv1.json | 499 +----------------- .../apache/spark/sql/SQLQueryTestSuite.scala | 6 + 6 files changed, 304 insertions(+), 524 deletions(-) delete mode 100644 sql/core/src/test/resources/sql-tests/inputs/auto_join0.sql create mode 100644 sql/core/src/test/resources/sql-tests/inputs/join.sql delete mode 100644 sql/core/src/test/resources/sql-tests/results/auto_join0.sql.out create mode 100644 sql/core/src/test/resources/sql-tests/results/join.sql.out diff --git a/sql/core/src/test/resources/sql-tests/inputs/auto_join0.sql b/sql/core/src/test/resources/sql-tests/inputs/auto_join0.sql deleted file mode 100644 index 46fc0a0875220..0000000000000 --- a/sql/core/src/test/resources/sql-tests/inputs/auto_join0.sql +++ /dev/null @@ -1,9 +0,0 @@ -select sum(hash(a.k1,a.v1,a.k2, a.v2)) -from ( -SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -) a; diff --git a/sql/core/src/test/resources/sql-tests/inputs/join.sql b/sql/core/src/test/resources/sql-tests/inputs/join.sql new file mode 100644 index 0000000000000..d6ab586c87fa6 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/join.sql @@ -0,0 +1,107 @@ +-- self-join (auto_join0.q) +SELECT a.k1, a.v1, a.k2, a.v2 +FROM ( +SELECT src1.key as k1, src1.value as v1, + src2.key as k2, src2.value as v2 FROM + (SELECT * FROM src WHERE src.key < 200) src1 + JOIN + (SELECT * FROM src WHERE src.key < 200) src2 + SORT BY k1, v1, k2, v2 +) a; + +-- self-join (auto_join1.q) +SELECT src1.key, src2.value +FROM src src1 JOIN src src2 ON (src1.key = src2.key); + +-- self-join (auto_join2.q) +SELECT src1.key, src3.value +FROM src src1 JOIN src src2 ON (src1.key = src2.key) + JOIN src src3 ON (src1.key + src2.key = src3.key); + +-- self-join (auto_join3.q) +SELECT src1.key, src3.value +FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key); + +-- left-outer join (auto_join4.q) +SELECT c.c1, c.c2, c.c3, c.c4 +FROM ( + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 100 and src1.key < 300 + ) a + LEFT OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 200 and src2.key < 400 + ) b + ON (a.c1 = b.c3) +) c; + +-- right-outer join (auto_join5.q) +SELECT c.c1, c.c2, c.c3, c.c4 +FROM ( + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 100 and src1.key < 300 + ) a + RIGHT OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 200 and src2.key < 400 + ) b + ON (a.c1 = b.c3) +) c; + +-- full-outer join (auto_join6.q) +SELECT c.c1, c.c2, c.c3, c.c4 +FROM ( + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 100 and src1.key < 300 + ) a + FULL OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 200 and src2.key < 400 + ) b + ON (a.c1 = b.c3) +) c; + +-- full-outer join + left-outer join (auto_join7.q) +SELECT c.c1, c.c2, c.c3, c.c4, c.c5, c.c6 +FROM ( + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4, c.c5 AS c5, c.c6 AS c6 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 10 and src1.key < 150 + ) a + FULL OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 150 and src2.key < 300 + ) b + ON (a.c1 = b.c3) + LEFT OUTER JOIN + ( + SELECT src3.key AS c5, src3.value AS c6 FROM src src3 WHERE src3.key > 200 and src3.key < 400 + ) c + ON (a.c1 = c.c5) +) c; + +-- left-outer join (auto_join8.q) +SELECT c.c1, c.c2, c.c3, c.c4 +FROM ( + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 100 and src1.key < 300 + ) a + LEFT OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 200 and src2.key < 400 + ) b + ON (a.c1 = b.c3) +) c +where c.c3 IS NULL AND c.c1 IS NOT NULL; + + + diff --git a/sql/core/src/test/resources/sql-tests/results/auto_join0.sql.out b/sql/core/src/test/resources/sql-tests/results/auto_join0.sql.out deleted file mode 100644 index 8a15a480d6fc2..0000000000000 --- a/sql/core/src/test/resources/sql-tests/results/auto_join0.sql.out +++ /dev/null @@ -1,18 +0,0 @@ --- Automatically generated by SQLQueryTestSuite --- Number of queries: 1 - - --- !query 0 -select sum(hash(a.k1,a.v1,a.k2, a.v2)) -from ( -SELECT src1.key as k1, src1.value as v1, - src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 10) src1 - JOIN - (SELECT * FROM src WHERE src.key < 10) src2 - SORT BY k1, v1, k2, v2 -) a --- !query 0 schema -struct --- !query 0 output -42294652308 diff --git a/sql/core/src/test/resources/sql-tests/results/join.sql.out b/sql/core/src/test/resources/sql-tests/results/join.sql.out new file mode 100644 index 0000000000000..7476028408f22 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/join.sql.out @@ -0,0 +1,189 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 9 + + +-- !query 0 +SELECT a.k1, a.v1, a.k2, a.v2 +FROM ( +SELECT src1.key as k1, src1.value as v1, + src2.key as k2, src2.value as v2 FROM + (SELECT * FROM src WHERE src.key < 200) src1 + JOIN + (SELECT * FROM src WHERE src.key < 200) src2 + SORT BY k1, v1, k2, v2 +) a +-- !query 0 schema +struct +-- !query 0 output +165 val_165 165 val_165 +165 val_165 165 val_165 +165 val_165 165 val_165 +165 val_165 165 val_165 +165 val_165 86 val_86 +165 val_165 86 val_86 +86 val_86 165 val_165 +86 val_86 165 val_165 +86 val_86 86 val_86 + + +-- !query 1 +SELECT src1.key, src2.value +FROM src src1 JOIN src src2 ON (src1.key = src2.key) +-- !query 1 schema +struct +-- !query 1 output +165 val_165 +165 val_165 +165 val_165 +165 val_165 +251 val_251 +330 val_330 +86 val_86 + + +-- !query 2 +SELECT src1.key, src3.value +FROM src src1 JOIN src src2 ON (src1.key = src2.key) + JOIN src src3 ON (src1.key + src2.key = src3.key) +-- !query 2 schema +struct +-- !query 2 output +165 val_330 +165 val_330 +165 val_330 +165 val_330 + + +-- !query 3 +SELECT src1.key, src3.value +FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key) +-- !query 3 schema +struct +-- !query 3 output +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +251 val_251 +330 val_330 +86 val_86 + + +-- !query 4 +SELECT c.c1, c.c2, c.c3, c.c4 +FROM ( + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 100 and src1.key < 300 + ) a + LEFT OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 200 and src2.key < 400 + ) b + ON (a.c1 = b.c3) +) c +-- !query 4 schema +struct +-- !query 4 output +165 val_165 NULL NULL +165 val_165 NULL NULL +251 val_251 251 val_251 + + +-- !query 5 +SELECT c.c1, c.c2, c.c3, c.c4 +FROM ( + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 100 and src1.key < 300 + ) a + RIGHT OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 200 and src2.key < 400 + ) b + ON (a.c1 = b.c3) +) c +-- !query 5 schema +struct +-- !query 5 output +251 val_251 251 val_251 +NULL NULL 330 val_330 + + +-- !query 6 +SELECT c.c1, c.c2, c.c3, c.c4 +FROM ( + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 100 and src1.key < 300 + ) a + FULL OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 200 and src2.key < 400 + ) b + ON (a.c1 = b.c3) +) c +-- !query 6 schema +struct +-- !query 6 output +165 val_165 NULL NULL +165 val_165 NULL NULL +251 val_251 251 val_251 +NULL NULL 330 val_330 + + +-- !query 7 +SELECT c.c1, c.c2, c.c3, c.c4, c.c5, c.c6 +FROM ( + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4, c.c5 AS c5, c.c6 AS c6 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 10 and src1.key < 150 + ) a + FULL OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 150 and src2.key < 300 + ) b + ON (a.c1 = b.c3) + LEFT OUTER JOIN + ( + SELECT src3.key AS c5, src3.value AS c6 FROM src src3 WHERE src3.key > 200 and src3.key < 400 + ) c + ON (a.c1 = c.c5) +) c +-- !query 7 schema +struct +-- !query 7 output +86 val_86 NULL NULL NULL NULL +NULL NULL 165 val_165 NULL NULL +NULL NULL 165 val_165 NULL NULL +NULL NULL 251 val_251 NULL NULL + + +-- !query 8 +SELECT c.c1, c.c2, c.c3, c.c4 +FROM ( + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 100 and src1.key < 300 + ) a + LEFT OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 200 and src2.key < 400 + ) b + ON (a.c1 = b.c3) +) c +where c.c3 IS NULL AND c.c1 IS NOT NULL +-- !query 8 schema +struct +-- !query 8 output +165 val_165 NULL NULL +165 val_165 NULL NULL diff --git a/sql/core/src/test/resources/test-data/kv1.json b/sql/core/src/test/resources/test-data/kv1.json index 5981db6e17b6f..7bd430e36dba1 100644 --- a/sql/core/src/test/resources/test-data/kv1.json +++ b/sql/core/src/test/resources/test-data/kv1.json @@ -1,500 +1,5 @@ -{"key":238,"value":"val_238"} +{"key":251,"value":"val_251"} {"key":86,"value":"val_86"} -{"key":311,"value":"val_311"} -{"key":27,"value":"val_27"} {"key":165,"value":"val_165"} -{"key":409,"value":"val_409"} -{"key":255,"value":"val_255"} -{"key":278,"value":"val_278"} -{"key":98,"value":"val_98"} -{"key":484,"value":"val_484"} -{"key":265,"value":"val_265"} -{"key":193,"value":"val_193"} -{"key":401,"value":"val_401"} -{"key":150,"value":"val_150"} -{"key":273,"value":"val_273"} -{"key":224,"value":"val_224"} -{"key":369,"value":"val_369"} -{"key":66,"value":"val_66"} -{"key":128,"value":"val_128"} -{"key":213,"value":"val_213"} -{"key":146,"value":"val_146"} -{"key":406,"value":"val_406"} -{"key":429,"value":"val_429"} -{"key":374,"value":"val_374"} -{"key":152,"value":"val_152"} -{"key":469,"value":"val_469"} -{"key":145,"value":"val_145"} -{"key":495,"value":"val_495"} -{"key":37,"value":"val_37"} -{"key":327,"value":"val_327"} -{"key":281,"value":"val_281"} -{"key":277,"value":"val_277"} -{"key":209,"value":"val_209"} -{"key":15,"value":"val_15"} -{"key":82,"value":"val_82"} -{"key":403,"value":"val_403"} -{"key":166,"value":"val_166"} -{"key":417,"value":"val_417"} -{"key":430,"value":"val_430"} -{"key":252,"value":"val_252"} -{"key":292,"value":"val_292"} -{"key":219,"value":"val_219"} -{"key":287,"value":"val_287"} -{"key":153,"value":"val_153"} -{"key":193,"value":"val_193"} -{"key":338,"value":"val_338"} -{"key":446,"value":"val_446"} -{"key":459,"value":"val_459"} -{"key":394,"value":"val_394"} -{"key":237,"value":"val_237"} -{"key":482,"value":"val_482"} -{"key":174,"value":"val_174"} -{"key":413,"value":"val_413"} -{"key":494,"value":"val_494"} -{"key":207,"value":"val_207"} -{"key":199,"value":"val_199"} -{"key":466,"value":"val_466"} -{"key":208,"value":"val_208"} -{"key":174,"value":"val_174"} -{"key":399,"value":"val_399"} -{"key":396,"value":"val_396"} -{"key":247,"value":"val_247"} -{"key":417,"value":"val_417"} -{"key":489,"value":"val_489"} -{"key":162,"value":"val_162"} -{"key":377,"value":"val_377"} -{"key":397,"value":"val_397"} -{"key":309,"value":"val_309"} -{"key":365,"value":"val_365"} -{"key":266,"value":"val_266"} -{"key":439,"value":"val_439"} -{"key":342,"value":"val_342"} -{"key":367,"value":"val_367"} -{"key":325,"value":"val_325"} -{"key":167,"value":"val_167"} -{"key":195,"value":"val_195"} -{"key":475,"value":"val_475"} -{"key":17,"value":"val_17"} -{"key":113,"value":"val_113"} -{"key":155,"value":"val_155"} -{"key":203,"value":"val_203"} -{"key":339,"value":"val_339"} -{"key":0,"value":"val_0"} -{"key":455,"value":"val_455"} -{"key":128,"value":"val_128"} -{"key":311,"value":"val_311"} -{"key":316,"value":"val_316"} -{"key":57,"value":"val_57"} -{"key":302,"value":"val_302"} -{"key":205,"value":"val_205"} -{"key":149,"value":"val_149"} -{"key":438,"value":"val_438"} -{"key":345,"value":"val_345"} -{"key":129,"value":"val_129"} -{"key":170,"value":"val_170"} -{"key":20,"value":"val_20"} -{"key":489,"value":"val_489"} -{"key":157,"value":"val_157"} -{"key":378,"value":"val_378"} -{"key":221,"value":"val_221"} -{"key":92,"value":"val_92"} -{"key":111,"value":"val_111"} -{"key":47,"value":"val_47"} -{"key":72,"value":"val_72"} -{"key":4,"value":"val_4"} -{"key":280,"value":"val_280"} -{"key":35,"value":"val_35"} -{"key":427,"value":"val_427"} -{"key":277,"value":"val_277"} -{"key":208,"value":"val_208"} -{"key":356,"value":"val_356"} -{"key":399,"value":"val_399"} -{"key":169,"value":"val_169"} -{"key":382,"value":"val_382"} -{"key":498,"value":"val_498"} -{"key":125,"value":"val_125"} -{"key":386,"value":"val_386"} -{"key":437,"value":"val_437"} -{"key":469,"value":"val_469"} -{"key":192,"value":"val_192"} -{"key":286,"value":"val_286"} -{"key":187,"value":"val_187"} -{"key":176,"value":"val_176"} -{"key":54,"value":"val_54"} -{"key":459,"value":"val_459"} -{"key":51,"value":"val_51"} -{"key":138,"value":"val_138"} -{"key":103,"value":"val_103"} -{"key":239,"value":"val_239"} -{"key":213,"value":"val_213"} -{"key":216,"value":"val_216"} -{"key":430,"value":"val_430"} -{"key":278,"value":"val_278"} -{"key":176,"value":"val_176"} -{"key":289,"value":"val_289"} -{"key":221,"value":"val_221"} -{"key":65,"value":"val_65"} -{"key":318,"value":"val_318"} -{"key":332,"value":"val_332"} -{"key":311,"value":"val_311"} -{"key":275,"value":"val_275"} -{"key":137,"value":"val_137"} -{"key":241,"value":"val_241"} -{"key":83,"value":"val_83"} -{"key":333,"value":"val_333"} -{"key":180,"value":"val_180"} -{"key":284,"value":"val_284"} -{"key":12,"value":"val_12"} -{"key":230,"value":"val_230"} -{"key":181,"value":"val_181"} -{"key":67,"value":"val_67"} -{"key":260,"value":"val_260"} -{"key":404,"value":"val_404"} -{"key":384,"value":"val_384"} -{"key":489,"value":"val_489"} -{"key":353,"value":"val_353"} -{"key":373,"value":"val_373"} -{"key":272,"value":"val_272"} -{"key":138,"value":"val_138"} -{"key":217,"value":"val_217"} -{"key":84,"value":"val_84"} -{"key":348,"value":"val_348"} -{"key":466,"value":"val_466"} -{"key":58,"value":"val_58"} -{"key":8,"value":"val_8"} -{"key":411,"value":"val_411"} -{"key":230,"value":"val_230"} -{"key":208,"value":"val_208"} -{"key":348,"value":"val_348"} -{"key":24,"value":"val_24"} -{"key":463,"value":"val_463"} -{"key":431,"value":"val_431"} -{"key":179,"value":"val_179"} -{"key":172,"value":"val_172"} -{"key":42,"value":"val_42"} -{"key":129,"value":"val_129"} -{"key":158,"value":"val_158"} -{"key":119,"value":"val_119"} -{"key":496,"value":"val_496"} -{"key":0,"value":"val_0"} -{"key":322,"value":"val_322"} -{"key":197,"value":"val_197"} -{"key":468,"value":"val_468"} -{"key":393,"value":"val_393"} -{"key":454,"value":"val_454"} -{"key":100,"value":"val_100"} -{"key":298,"value":"val_298"} -{"key":199,"value":"val_199"} -{"key":191,"value":"val_191"} -{"key":418,"value":"val_418"} -{"key":96,"value":"val_96"} -{"key":26,"value":"val_26"} +{"key":330,"value":"val_330"} {"key":165,"value":"val_165"} -{"key":327,"value":"val_327"} -{"key":230,"value":"val_230"} -{"key":205,"value":"val_205"} -{"key":120,"value":"val_120"} -{"key":131,"value":"val_131"} -{"key":51,"value":"val_51"} -{"key":404,"value":"val_404"} -{"key":43,"value":"val_43"} -{"key":436,"value":"val_436"} -{"key":156,"value":"val_156"} -{"key":469,"value":"val_469"} -{"key":468,"value":"val_468"} -{"key":308,"value":"val_308"} -{"key":95,"value":"val_95"} -{"key":196,"value":"val_196"} -{"key":288,"value":"val_288"} -{"key":481,"value":"val_481"} -{"key":457,"value":"val_457"} -{"key":98,"value":"val_98"} -{"key":282,"value":"val_282"} -{"key":197,"value":"val_197"} -{"key":187,"value":"val_187"} -{"key":318,"value":"val_318"} -{"key":318,"value":"val_318"} -{"key":409,"value":"val_409"} -{"key":470,"value":"val_470"} -{"key":137,"value":"val_137"} -{"key":369,"value":"val_369"} -{"key":316,"value":"val_316"} -{"key":169,"value":"val_169"} -{"key":413,"value":"val_413"} -{"key":85,"value":"val_85"} -{"key":77,"value":"val_77"} -{"key":0,"value":"val_0"} -{"key":490,"value":"val_490"} -{"key":87,"value":"val_87"} -{"key":364,"value":"val_364"} -{"key":179,"value":"val_179"} -{"key":118,"value":"val_118"} -{"key":134,"value":"val_134"} -{"key":395,"value":"val_395"} -{"key":282,"value":"val_282"} -{"key":138,"value":"val_138"} -{"key":238,"value":"val_238"} -{"key":419,"value":"val_419"} -{"key":15,"value":"val_15"} -{"key":118,"value":"val_118"} -{"key":72,"value":"val_72"} -{"key":90,"value":"val_90"} -{"key":307,"value":"val_307"} -{"key":19,"value":"val_19"} -{"key":435,"value":"val_435"} -{"key":10,"value":"val_10"} -{"key":277,"value":"val_277"} -{"key":273,"value":"val_273"} -{"key":306,"value":"val_306"} -{"key":224,"value":"val_224"} -{"key":309,"value":"val_309"} -{"key":389,"value":"val_389"} -{"key":327,"value":"val_327"} -{"key":242,"value":"val_242"} -{"key":369,"value":"val_369"} -{"key":392,"value":"val_392"} -{"key":272,"value":"val_272"} -{"key":331,"value":"val_331"} -{"key":401,"value":"val_401"} -{"key":242,"value":"val_242"} -{"key":452,"value":"val_452"} -{"key":177,"value":"val_177"} -{"key":226,"value":"val_226"} -{"key":5,"value":"val_5"} -{"key":497,"value":"val_497"} -{"key":402,"value":"val_402"} -{"key":396,"value":"val_396"} -{"key":317,"value":"val_317"} -{"key":395,"value":"val_395"} -{"key":58,"value":"val_58"} -{"key":35,"value":"val_35"} -{"key":336,"value":"val_336"} -{"key":95,"value":"val_95"} -{"key":11,"value":"val_11"} -{"key":168,"value":"val_168"} -{"key":34,"value":"val_34"} -{"key":229,"value":"val_229"} -{"key":233,"value":"val_233"} -{"key":143,"value":"val_143"} -{"key":472,"value":"val_472"} -{"key":322,"value":"val_322"} -{"key":498,"value":"val_498"} -{"key":160,"value":"val_160"} -{"key":195,"value":"val_195"} -{"key":42,"value":"val_42"} -{"key":321,"value":"val_321"} -{"key":430,"value":"val_430"} -{"key":119,"value":"val_119"} -{"key":489,"value":"val_489"} -{"key":458,"value":"val_458"} -{"key":78,"value":"val_78"} -{"key":76,"value":"val_76"} -{"key":41,"value":"val_41"} -{"key":223,"value":"val_223"} -{"key":492,"value":"val_492"} -{"key":149,"value":"val_149"} -{"key":449,"value":"val_449"} -{"key":218,"value":"val_218"} -{"key":228,"value":"val_228"} -{"key":138,"value":"val_138"} -{"key":453,"value":"val_453"} -{"key":30,"value":"val_30"} -{"key":209,"value":"val_209"} -{"key":64,"value":"val_64"} -{"key":468,"value":"val_468"} -{"key":76,"value":"val_76"} -{"key":74,"value":"val_74"} -{"key":342,"value":"val_342"} -{"key":69,"value":"val_69"} -{"key":230,"value":"val_230"} -{"key":33,"value":"val_33"} -{"key":368,"value":"val_368"} -{"key":103,"value":"val_103"} -{"key":296,"value":"val_296"} -{"key":113,"value":"val_113"} -{"key":216,"value":"val_216"} -{"key":367,"value":"val_367"} -{"key":344,"value":"val_344"} -{"key":167,"value":"val_167"} -{"key":274,"value":"val_274"} -{"key":219,"value":"val_219"} -{"key":239,"value":"val_239"} -{"key":485,"value":"val_485"} -{"key":116,"value":"val_116"} -{"key":223,"value":"val_223"} -{"key":256,"value":"val_256"} -{"key":263,"value":"val_263"} -{"key":70,"value":"val_70"} -{"key":487,"value":"val_487"} -{"key":480,"value":"val_480"} -{"key":401,"value":"val_401"} -{"key":288,"value":"val_288"} -{"key":191,"value":"val_191"} -{"key":5,"value":"val_5"} -{"key":244,"value":"val_244"} -{"key":438,"value":"val_438"} -{"key":128,"value":"val_128"} -{"key":467,"value":"val_467"} -{"key":432,"value":"val_432"} -{"key":202,"value":"val_202"} -{"key":316,"value":"val_316"} -{"key":229,"value":"val_229"} -{"key":469,"value":"val_469"} -{"key":463,"value":"val_463"} -{"key":280,"value":"val_280"} -{"key":2,"value":"val_2"} -{"key":35,"value":"val_35"} -{"key":283,"value":"val_283"} -{"key":331,"value":"val_331"} -{"key":235,"value":"val_235"} -{"key":80,"value":"val_80"} -{"key":44,"value":"val_44"} -{"key":193,"value":"val_193"} -{"key":321,"value":"val_321"} -{"key":335,"value":"val_335"} -{"key":104,"value":"val_104"} -{"key":466,"value":"val_466"} -{"key":366,"value":"val_366"} -{"key":175,"value":"val_175"} -{"key":403,"value":"val_403"} -{"key":483,"value":"val_483"} -{"key":53,"value":"val_53"} -{"key":105,"value":"val_105"} -{"key":257,"value":"val_257"} -{"key":406,"value":"val_406"} -{"key":409,"value":"val_409"} -{"key":190,"value":"val_190"} -{"key":406,"value":"val_406"} -{"key":401,"value":"val_401"} -{"key":114,"value":"val_114"} -{"key":258,"value":"val_258"} -{"key":90,"value":"val_90"} -{"key":203,"value":"val_203"} -{"key":262,"value":"val_262"} -{"key":348,"value":"val_348"} -{"key":424,"value":"val_424"} -{"key":12,"value":"val_12"} -{"key":396,"value":"val_396"} -{"key":201,"value":"val_201"} -{"key":217,"value":"val_217"} -{"key":164,"value":"val_164"} -{"key":431,"value":"val_431"} -{"key":454,"value":"val_454"} -{"key":478,"value":"val_478"} -{"key":298,"value":"val_298"} -{"key":125,"value":"val_125"} -{"key":431,"value":"val_431"} -{"key":164,"value":"val_164"} -{"key":424,"value":"val_424"} -{"key":187,"value":"val_187"} -{"key":382,"value":"val_382"} -{"key":5,"value":"val_5"} -{"key":70,"value":"val_70"} -{"key":397,"value":"val_397"} -{"key":480,"value":"val_480"} -{"key":291,"value":"val_291"} -{"key":24,"value":"val_24"} -{"key":351,"value":"val_351"} -{"key":255,"value":"val_255"} -{"key":104,"value":"val_104"} -{"key":70,"value":"val_70"} -{"key":163,"value":"val_163"} -{"key":438,"value":"val_438"} -{"key":119,"value":"val_119"} -{"key":414,"value":"val_414"} -{"key":200,"value":"val_200"} -{"key":491,"value":"val_491"} -{"key":237,"value":"val_237"} -{"key":439,"value":"val_439"} -{"key":360,"value":"val_360"} -{"key":248,"value":"val_248"} -{"key":479,"value":"val_479"} -{"key":305,"value":"val_305"} -{"key":417,"value":"val_417"} -{"key":199,"value":"val_199"} -{"key":444,"value":"val_444"} -{"key":120,"value":"val_120"} -{"key":429,"value":"val_429"} -{"key":169,"value":"val_169"} -{"key":443,"value":"val_443"} -{"key":323,"value":"val_323"} -{"key":325,"value":"val_325"} -{"key":277,"value":"val_277"} -{"key":230,"value":"val_230"} -{"key":478,"value":"val_478"} -{"key":178,"value":"val_178"} -{"key":468,"value":"val_468"} -{"key":310,"value":"val_310"} -{"key":317,"value":"val_317"} -{"key":333,"value":"val_333"} -{"key":493,"value":"val_493"} -{"key":460,"value":"val_460"} -{"key":207,"value":"val_207"} -{"key":249,"value":"val_249"} -{"key":265,"value":"val_265"} -{"key":480,"value":"val_480"} -{"key":83,"value":"val_83"} -{"key":136,"value":"val_136"} -{"key":353,"value":"val_353"} -{"key":172,"value":"val_172"} -{"key":214,"value":"val_214"} -{"key":462,"value":"val_462"} -{"key":233,"value":"val_233"} -{"key":406,"value":"val_406"} -{"key":133,"value":"val_133"} -{"key":175,"value":"val_175"} -{"key":189,"value":"val_189"} -{"key":454,"value":"val_454"} -{"key":375,"value":"val_375"} -{"key":401,"value":"val_401"} -{"key":421,"value":"val_421"} -{"key":407,"value":"val_407"} -{"key":384,"value":"val_384"} -{"key":256,"value":"val_256"} -{"key":26,"value":"val_26"} -{"key":134,"value":"val_134"} -{"key":67,"value":"val_67"} -{"key":384,"value":"val_384"} -{"key":379,"value":"val_379"} -{"key":18,"value":"val_18"} -{"key":462,"value":"val_462"} -{"key":492,"value":"val_492"} -{"key":100,"value":"val_100"} -{"key":298,"value":"val_298"} -{"key":9,"value":"val_9"} -{"key":341,"value":"val_341"} -{"key":498,"value":"val_498"} -{"key":146,"value":"val_146"} -{"key":458,"value":"val_458"} -{"key":362,"value":"val_362"} -{"key":186,"value":"val_186"} -{"key":285,"value":"val_285"} -{"key":348,"value":"val_348"} -{"key":167,"value":"val_167"} -{"key":18,"value":"val_18"} -{"key":273,"value":"val_273"} -{"key":183,"value":"val_183"} -{"key":281,"value":"val_281"} -{"key":344,"value":"val_344"} -{"key":97,"value":"val_97"} -{"key":469,"value":"val_469"} -{"key":315,"value":"val_315"} -{"key":84,"value":"val_84"} -{"key":28,"value":"val_28"} -{"key":37,"value":"val_37"} -{"key":448,"value":"val_448"} -{"key":152,"value":"val_152"} -{"key":348,"value":"val_348"} -{"key":307,"value":"val_307"} -{"key":194,"value":"val_194"} -{"key":414,"value":"val_414"} -{"key":477,"value":"val_477"} -{"key":222,"value":"val_222"} -{"key":126,"value":"val_126"} -{"key":90,"value":"val_90"} -{"key":169,"value":"val_169"} -{"key":403,"value":"val_403"} -{"key":400,"value":"val_400"} -{"key":200,"value":"val_200"} -{"key":97,"value":"val_97"} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index ff0280ff35b62..aac61211964c6 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -256,6 +256,12 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext { .createOrReplaceTempView("src") } + test("test") { + val srcSchema = new StructType().add("key", IntegerType).add("value", StringType) + spark.read.schema(srcSchema).json(getFilePath("test-data/kv1.json")) + .createOrReplaceTempView("src") + } + private val originalTimeZone = TimeZone.getDefault private val originalLocale = Locale.getDefault From aaeb298e989c50926a1480508d27acace7118cf8 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Sat, 13 Aug 2016 00:05:31 -0700 Subject: [PATCH 04/27] batch 1 --- .../test/resources/sql-tests/inputs/join.sql | 6 ++++-- .../resources/sql-tests/results/join.sql.out | 18 +++++++++++++++++- .../apache/spark/sql/SQLQueryTestSuite.scala | 5 +++++ 3 files changed, 26 insertions(+), 3 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/join.sql b/sql/core/src/test/resources/sql-tests/inputs/join.sql index d6ab586c87fa6..bcacfbb650c38 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/join.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/join.sql @@ -103,5 +103,7 @@ FROM ( ) c where c.c3 IS NULL AND c.c1 IS NOT NULL; - - +-- join (auto_join9.q) +SELECT src1.key, src2.value +FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +WHERE src1.ds = '2008-04-08' and src1.hr = '12' diff --git a/sql/core/src/test/resources/sql-tests/results/join.sql.out b/sql/core/src/test/resources/sql-tests/results/join.sql.out index 7476028408f22..aa30bf573c4a4 100644 --- a/sql/core/src/test/resources/sql-tests/results/join.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/join.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 9 +-- Number of queries: 10 -- !query 0 @@ -187,3 +187,19 @@ struct -- !query 8 output 165 val_165 NULL NULL 165 val_165 NULL NULL + + +-- !query 9 +SELECT src1.key, src2.value +FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +WHERE src1.ds = '2008-04-08' and src1.hr = '12' +-- !query 9 schema +struct +-- !query 9 output +165 val_165 +165 val_165 +165 val_165 +165 val_165 +251 val_251 +330 val_330 +86 val_86 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index aac61211964c6..55615b800cb40 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -254,6 +254,11 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext { val srcSchema = new StructType().add("key", IntegerType).add("value", StringType) session.read.schema(srcSchema).json(getFilePath("test-data/kv1.json")) .createOrReplaceTempView("src") + + val srcpartSchema = new StructType().add("key", IntegerType).add("value", StringType) + .add("ds", StringType).add("hr", StringType) + session.read.schema(srcpartSchema).json(getFilePath("test-data/srcpart.json")) + .createOrReplaceTempView("srcpart") } test("test") { From 5f223160800da89166153c271f0e00e9488da135 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Sat, 13 Aug 2016 00:06:08 -0700 Subject: [PATCH 05/27] revert --- .../src/test/resources/test-data/srcpart.json | 20 +++++++++++++++++++ .../apache/spark/sql/SQLQueryTestSuite.scala | 6 ------ 2 files changed, 20 insertions(+), 6 deletions(-) create mode 100644 sql/core/src/test/resources/test-data/srcpart.json diff --git a/sql/core/src/test/resources/test-data/srcpart.json b/sql/core/src/test/resources/test-data/srcpart.json new file mode 100644 index 0000000000000..aea3b4cfd7e3e --- /dev/null +++ b/sql/core/src/test/resources/test-data/srcpart.json @@ -0,0 +1,20 @@ +{"key":251,"value":"val_251","ds":"2008-04-08","hr":"11"} +{"key":251,"value":"val_251","ds":"2008-04-09","hr":"11"} +{"key":251,"value":"val_251","ds":"2008-04-08","hr":"12"} +{"key":251,"value":"val_251","ds":"2008-04-09","hr":"12"} +{"key":86,"value":"val_86","ds":"2008-04-08","hr":"11"} +{"key":86,"value":"val_86","ds":"2008-04-09","hr":"11"} +{"key":86,"value":"val_86","ds":"2008-04-08","hr":"12"} +{"key":86,"value":"val_86","ds":"2008-04-09","hr":"12"} +{"key":165,"value":"val_165","ds":"2008-04-08","hr":"11"} +{"key":165,"value":"val_165","ds":"2008-04-09","hr":"11"} +{"key":165,"value":"val_165","ds":"2008-04-08","hr":"12"} +{"key":165,"value":"val_165","ds":"2008-04-09","hr":"12"} +{"key":330,"value":"val_330","ds":"2008-04-08","hr":"11"} +{"key":330,"value":"val_330","ds":"2008-04-09","hr":"11"} +{"key":330,"value":"val_330","ds":"2008-04-08","hr":"12"} +{"key":330,"value":"val_330","ds":"2008-04-09","hr":"12"} +{"key":165,"value":"val_165","ds":"2008-04-08","hr":"11"} +{"key":165,"value":"val_165","ds":"2008-04-09","hr":"11"} +{"key":165,"value":"val_165","ds":"2008-04-08","hr":"12"} +{"key":165,"value":"val_165","ds":"2008-04-09","hr":"12"} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index 55615b800cb40..4dc8d61ce79b5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -261,12 +261,6 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext { .createOrReplaceTempView("srcpart") } - test("test") { - val srcSchema = new StructType().add("key", IntegerType).add("value", StringType) - spark.read.schema(srcSchema).json(getFilePath("test-data/kv1.json")) - .createOrReplaceTempView("src") - } - private val originalTimeZone = TimeZone.getDefault private val originalLocale = Locale.getDefault From 3fe55f184e5e8771c88b826f9bcccb76d9817624 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Sat, 13 Aug 2016 09:53:02 -0700 Subject: [PATCH 06/27] batch 2 --- .../test/resources/sql-tests/inputs/join.sql | 337 +++++++- .../resources/sql-tests/results/join.sql.out | 728 +++++++++++++++++- .../src/test/resources/test-data/kv3.json | 7 + .../apache/spark/sql/SQLQueryTestSuite.scala | 4 + 4 files changed, 1034 insertions(+), 42 deletions(-) create mode 100644 sql/core/src/test/resources/test-data/kv3.json diff --git a/sql/core/src/test/resources/sql-tests/inputs/join.sql b/sql/core/src/test/resources/sql-tests/inputs/join.sql index bcacfbb650c38..72f7d06295557 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/join.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/join.sql @@ -25,31 +25,31 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key -- left-outer join (auto_join4.q) SELECT c.c1, c.c2, c.c3, c.c4 FROM ( - SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 - FROM - ( - SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 100 and src1.key < 300 - ) a - LEFT OUTER JOIN - ( - SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 200 and src2.key < 400 - ) b - ON (a.c1 = b.c3) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 100 and src1.key < 300 + ) a + LEFT OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 200 and src2.key < 400 + ) b + ON (a.c1 = b.c3) ) c; -- right-outer join (auto_join5.q) SELECT c.c1, c.c2, c.c3, c.c4 FROM ( - SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 - FROM - ( - SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 100 and src1.key < 300 - ) a - RIGHT OUTER JOIN - ( - SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 200 and src2.key < 400 - ) b - ON (a.c1 = b.c3) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 100 and src1.key < 300 + ) a + RIGHT OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 200 and src2.key < 400 + ) b + ON (a.c1 = b.c3) ) c; -- full-outer join (auto_join6.q) @@ -106,4 +106,299 @@ where c.c3 IS NULL AND c.c1 IS NOT NULL; -- join (auto_join9.q) SELECT src1.key, src2.value FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) -WHERE src1.ds = '2008-04-08' and src1.hr = '12' +WHERE src1.ds = '2008-04-08' and src1.hr = '12'; + +-- self-join (auto_join10.q) +FROM +(SELECT src.* FROM src) x +JOIN +(SELECT src.* FROM src) Y +ON (x.key = Y.key) +select Y.key, Y.value; + +-- self-join (auto_join11.q) +SELECT src1.c1, src2.c4 +FROM + (SELECT src.key as c1, src.value as c2 from src) src1 + JOIN + (SELECT src.key as c3, src.value as c4 from src) src2 + ON src1.c1 = src2.c3 AND src1.c1 < 200; + +-- join (auto_join12.q) +SELECT src1.c1, src2.c4 +FROM + (SELECT src.key as c1, src.value as c2 from src) src1 + JOIN + (SELECT src.key as c3, src.value as c4 from src) src2 + ON src1.c1 = src2.c3 AND src1.c1 < 200 + JOIN + (SELECT src.key as c5, src.value as c6 from src) src3 + ON src1.c1 = src3.c5 AND src3.c5 < 100; + +-- join (auto_join13.q) +SELECT src1.c1, src2.c4 +FROM + (SELECT src.key as c1, src.value as c2 from src) src1 + JOIN + (SELECT src.key as c3, src.value as c4 from src) src2 + ON src1.c1 = src2.c3 AND src1.c1 < 250 + JOIN + (SELECT src.key as c5, src.value as c6 from src) src3 + ON src1.c1 + src2.c3 = src3.c5 AND src3.c5 < 400; + +-- join (auto_join14.q) +FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 200 +SELECT src.key, srcpart.value; + +-- join (auto_join15.q) +SELECT a.k1, a.v1, a.k2, a.v2 + FROM ( + SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 + FROM src src1 JOIN src src2 ON (src1.key = src2.key) + SORT BY k1, v1, k2, v2 + ) a; + +-- join (auto_join16.q) +SELECT subq.key, tab.value +FROM +(select a.key, a.value from src a where a.key > 100 ) subq +JOIN src tab +ON (subq.key = tab.key and subq.key > 150 and subq.value = tab.value) +where tab.key < 200; + +-- join (auto_join17.q) +SELECT src1.*, src2.* +FROM src src1 JOIN src src2 ON (src1.key = src2.key); + +-- join (auto_join18.q) +SELECT a.key, a.value, b.key, b.value +FROM + ( + SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key + ) a + FULL OUTER JOIN + ( + SELECT src2.key as key, count(distinct(src2.value)) AS value + FROM src1 src2 group by src2.key + ) b +ON (a.key = b.key); + +-- join (auto_join18_multi_distinct.q) +SELECT a.key, a.value, b.key, b.value1, b.value2 +FROM + ( + SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key + ) a + FULL OUTER JOIN + ( + SELECT src2.key as key, count(distinct(src2.value)) AS value1, + count(distinct(src2.key)) AS value2 + FROM src1 src2 group by src2.key + ) b +ON (a.key = b.key); + +-- join (auto_join19.q) +SELECT src1.key, src2.value +FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11'); + +-- join (auto_join20.q) +SELECT a.k1,a.v1,a.k2,a.v2,a.k3,a.v3 +FROM ( + SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 + FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 200) + RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 300) + SORT BY k1,v1,k2,v2,k3,v3 +)a; + +-- join (auto_join20.q) +SELECT a.k1,a.v1,a.k2,a.v2,a.k3,a.v3 +FROM ( + SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 + FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key < 100) + RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 300) + SORT BY k1,v1,k2,v2,k3,v3 +)a; + +-- join (auto_join21.q) +SELECT * +FROM + src src1 + LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) +SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; + +-- join (auto_join22.q) +SELECT src5.src1_value +FROM + (SELECT src3.*, src4.value as src4_value, src4.key as src4_key + FROM src src4 + JOIN (SELECT src2.*, src1.key as src1_key, src1.value as src1_value + FROM src src1 + JOIN src src2 ON src1.key = src2.key) src3 + ON src3.src1_key = src4.key) src5; + +-- join (auto_join23.q) +SELECT * FROM src src1 JOIN src src2 +WHERE src1.key < 200 and src2.key < 200 +SORT BY src1.key, src1.value, src2.key, src2.value; + +-- join (auto_join24.q) +WITH tst1 AS (SELECT a.key, count(1) as cnt FROM src a group by a.key) +SELECT sum(a.cnt) FROM tst1 a JOIN tst1 b ON a.key = b.key; + +-- join (auto_join26.q) +SELECT x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key order by x.key; + +-- join (auto_join27.q) +SELECT count(1) +FROM +( + SELECT src.key, src.value from src + UNION ALL + SELECT DISTINCT src.key, src.value from src +) src_12 +JOIN +( + SELECT src.key as k, src.value as v from src +) src3 +ON src_12.key = src3.k AND src3.k < 300; + +-- join (auto_join28.q) +SELECT * FROM src src1 + LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; + +-- join (auto_join28.q) +SELECT * FROM src src1 + RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; + +-- join (auto_join28.q) +SELECT * FROM src src1 + LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; + +-- join (auto_join28.q) +SELECT * FROM src src1 + RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; + +-- join (auto_join29.q) +SELECT * FROM src src1 + JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; + +-- join (auto_join29.q) +SELECT * FROM src src1 + JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; + +-- join (auto_join29.q) +SELECT * FROM src src1 + LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; + +-- join (auto_join29.q) +SELECT * FROM src src1 + RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; + +-- join (auto_join30.q) +FROM +(SELECT src.* FROM src sort by key) x +JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +select Y.key,Y.value; + +-- join (auto_join30.q) +FROM +(SELECT src.* FROM src sort by key) x +LEFT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +select Y.key,Y.value; + +-- join (auto_join30.q) +FROM +(SELECT src.* FROM src sort by key) x +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +select Y.key,Y.value; + +-- join (auto_join30.q) +FROM +(SELECT src.* FROM src sort by key) x +JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value; + +-- join (auto_join30.q) +FROM +(SELECT src.* FROM src sort by key) x +JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value; + +-- join (auto_join30.q) +FROM +(SELECT src.* FROM src sort by key) x +LEFT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value; + +-- join (auto_join30.q) +FROM +(SELECT src.* FROM src sort by key) x +LEFT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value; + +-- join (auto_join30.q) +FROM +(SELECT src.* FROM src sort by key) x +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value; + +-- join (auto_join31.q) +FROM +(SELECT src.* FROM src sort by key) x +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value; + diff --git a/sql/core/src/test/resources/sql-tests/results/join.sql.out b/sql/core/src/test/resources/sql-tests/results/join.sql.out index aa30bf573c4a4..f20f3e03b8ea1 100644 --- a/sql/core/src/test/resources/sql-tests/results/join.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/join.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 10 +-- Number of queries: 46 -- !query 0 @@ -76,16 +76,16 @@ struct -- !query 4 SELECT c.c1, c.c2, c.c3, c.c4 FROM ( - SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 - FROM - ( - SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 100 and src1.key < 300 - ) a - LEFT OUTER JOIN - ( - SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 200 and src2.key < 400 - ) b - ON (a.c1 = b.c3) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 100 and src1.key < 300 + ) a + LEFT OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 200 and src2.key < 400 + ) b + ON (a.c1 = b.c3) ) c -- !query 4 schema struct @@ -98,16 +98,16 @@ struct -- !query 5 SELECT c.c1, c.c2, c.c3, c.c4 FROM ( - SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 - FROM - ( - SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 100 and src1.key < 300 - ) a - RIGHT OUTER JOIN - ( - SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 200 and src2.key < 400 - ) b - ON (a.c1 = b.c3) + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 100 and src1.key < 300 + ) a + RIGHT OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 200 and src2.key < 400 + ) b + ON (a.c1 = b.c3) ) c -- !query 5 schema struct @@ -203,3 +203,689 @@ struct 251 val_251 330 val_330 86 val_86 + + +-- !query 10 +FROM +(SELECT src.* FROM src) x +JOIN +(SELECT src.* FROM src) Y +ON (x.key = Y.key) +select Y.key, Y.value +-- !query 10 schema +struct +-- !query 10 output +165 val_165 +165 val_165 +165 val_165 +165 val_165 +251 val_251 +330 val_330 +86 val_86 + + +-- !query 11 +SELECT src1.c1, src2.c4 +FROM + (SELECT src.key as c1, src.value as c2 from src) src1 + JOIN + (SELECT src.key as c3, src.value as c4 from src) src2 + ON src1.c1 = src2.c3 AND src1.c1 < 200 +-- !query 11 schema +struct +-- !query 11 output +165 val_165 +165 val_165 +165 val_165 +165 val_165 +86 val_86 + + +-- !query 12 +SELECT src1.c1, src2.c4 +FROM + (SELECT src.key as c1, src.value as c2 from src) src1 + JOIN + (SELECT src.key as c3, src.value as c4 from src) src2 + ON src1.c1 = src2.c3 AND src1.c1 < 200 + JOIN + (SELECT src.key as c5, src.value as c6 from src) src3 + ON src1.c1 = src3.c5 AND src3.c5 < 100 +-- !query 12 schema +struct +-- !query 12 output +86 val_86 + + +-- !query 13 +SELECT src1.c1, src2.c4 +FROM + (SELECT src.key as c1, src.value as c2 from src) src1 + JOIN + (SELECT src.key as c3, src.value as c4 from src) src2 + ON src1.c1 = src2.c3 AND src1.c1 < 250 + JOIN + (SELECT src.key as c5, src.value as c6 from src) src3 + ON src1.c1 + src2.c3 = src3.c5 AND src3.c5 < 400 +-- !query 13 schema +struct +-- !query 13 output +165 val_165 +165 val_165 +165 val_165 +165 val_165 + + +-- !query 14 +FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 200 +SELECT src.key, srcpart.value +-- !query 14 schema +struct +-- !query 14 output +251 val_251 +251 val_251 +330 val_330 +330 val_330 + + +-- !query 15 +SELECT a.k1, a.v1, a.k2, a.v2 + FROM ( + SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 + FROM src src1 JOIN src src2 ON (src1.key = src2.key) + SORT BY k1, v1, k2, v2 + ) a +-- !query 15 schema +struct +-- !query 15 output +165 val_165 165 val_165 +165 val_165 165 val_165 +165 val_165 165 val_165 +165 val_165 165 val_165 +251 val_251 251 val_251 +330 val_330 330 val_330 +86 val_86 86 val_86 + + +-- !query 16 +SELECT subq.key, tab.value +FROM +(select a.key, a.value from src a where a.key > 100 ) subq +JOIN src tab +ON (subq.key = tab.key and subq.key > 150 and subq.value = tab.value) +where tab.key < 200 +-- !query 16 schema +struct +-- !query 16 output +165 val_165 +165 val_165 +165 val_165 +165 val_165 + + +-- !query 17 +SELECT src1.*, src2.* +FROM src src1 JOIN src src2 ON (src1.key = src2.key) +-- !query 17 schema +struct +-- !query 17 output +165 val_165 165 val_165 +165 val_165 165 val_165 +165 val_165 165 val_165 +165 val_165 165 val_165 +251 val_251 251 val_251 +330 val_330 330 val_330 +86 val_86 86 val_86 + + +-- !query 18 +SELECT a.key, a.value, b.key, b.value +FROM + ( + SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key + ) a + FULL OUTER JOIN + ( + SELECT src2.key as key, count(distinct(src2.value)) AS value + FROM src1 src2 group by src2.key + ) b +ON (a.key = b.key) +-- !query 18 schema +struct +-- !query 18 output +165 2 165 1 +251 1 NULL NULL +330 1 330 1 +86 1 86 1 +NULL NULL 201 0 +NULL NULL NULL 1 + + +-- !query 19 +SELECT a.key, a.value, b.key, b.value1, b.value2 +FROM + ( + SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key + ) a + FULL OUTER JOIN + ( + SELECT src2.key as key, count(distinct(src2.value)) AS value1, + count(distinct(src2.key)) AS value2 + FROM src1 src2 group by src2.key + ) b +ON (a.key = b.key) +-- !query 19 schema +struct +-- !query 19 output +165 2 165 1 1 +251 1 NULL NULL NULL +330 1 330 1 1 +86 1 86 1 1 +NULL NULL 201 0 1 +NULL NULL NULL 1 0 + + +-- !query 20 +SELECT src1.key, src2.value +FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') +-- !query 20 schema +struct +-- !query 20 output +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +251 val_251 +251 val_251 +251 val_251 +251 val_251 +330 val_330 +330 val_330 +330 val_330 +330 val_330 +86 val_86 +86 val_86 +86 val_86 +86 val_86 + + +-- !query 21 +SELECT a.k1,a.v1,a.k2,a.v2,a.k3,a.v3 +FROM ( + SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 + FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 200) + RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 300) + SORT BY k1,v1,k2,v2,k3,v3 +)a +-- !query 21 schema +struct +-- !query 21 output +165 val_165 165 val_165 165 val_165 +165 val_165 165 val_165 165 val_165 +165 val_165 165 val_165 165 val_165 +165 val_165 165 val_165 165 val_165 +165 val_165 165 val_165 165 val_165 +165 val_165 165 val_165 165 val_165 +165 val_165 165 val_165 165 val_165 +165 val_165 165 val_165 165 val_165 +86 val_86 86 val_86 86 val_86 +NULL NULL NULL NULL 251 val_251 +NULL NULL NULL NULL 330 val_330 + + +-- !query 22 +SELECT a.k1,a.v1,a.k2,a.v2,a.k3,a.v3 +FROM ( + SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 + FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key < 100) + RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 300) + SORT BY k1,v1,k2,v2,k3,v3 +)a +-- !query 22 schema +struct +-- !query 22 output +86 val_86 86 val_86 86 val_86 +NULL NULL NULL NULL 165 val_165 +NULL NULL NULL NULL 165 val_165 +NULL NULL NULL NULL 251 val_251 +NULL NULL NULL NULL 330 val_330 + + +-- !query 23 +SELECT * +FROM + src src1 + LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) +SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +-- !query 23 schema +struct +-- !query 23 output +NULL NULL NULL NULL 165 val_165 +NULL NULL NULL NULL 165 val_165 +NULL NULL NULL NULL 251 val_251 +NULL NULL NULL NULL 330 val_330 +NULL NULL NULL NULL 86 val_86 + + +-- !query 24 +SELECT src5.src1_value +FROM + (SELECT src3.*, src4.value as src4_value, src4.key as src4_key + FROM src src4 + JOIN (SELECT src2.*, src1.key as src1_key, src1.value as src1_value + FROM src src1 + JOIN src src2 ON src1.key = src2.key) src3 + ON src3.src1_key = src4.key) src5 +-- !query 24 schema +struct +-- !query 24 output +val_165 +val_165 +val_165 +val_165 +val_165 +val_165 +val_165 +val_165 +val_251 +val_330 +val_86 + + +-- !query 25 +SELECT * FROM src src1 JOIN src src2 +WHERE src1.key < 200 and src2.key < 200 +SORT BY src1.key, src1.value, src2.key, src2.value +-- !query 25 schema +struct +-- !query 25 output +165 val_165 165 val_165 +165 val_165 165 val_165 +165 val_165 165 val_165 +165 val_165 165 val_165 +165 val_165 86 val_86 +165 val_165 86 val_86 +86 val_86 165 val_165 +86 val_86 165 val_165 +86 val_86 86 val_86 + + +-- !query 26 +WITH tst1 AS (SELECT a.key, count(1) as cnt FROM src a group by a.key) +SELECT sum(a.cnt) FROM tst1 a JOIN tst1 b ON a.key = b.key +-- !query 26 schema +struct +-- !query 26 output +5 + + +-- !query 27 +SELECT x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key order by x.key +-- !query 27 schema +struct +-- !query 27 output +86 1 +165 4 +330 1 + + +-- !query 28 +SELECT count(1) +FROM +( + SELECT src.key, src.value from src + UNION ALL + SELECT DISTINCT src.key, src.value from src +) src_12 +JOIN +( + SELECT src.key as k, src.value as v from src +) src3 +ON src_12.key = src3.k AND src3.k < 300 +-- !query 28 schema +struct +-- !query 28 output +10 + + +-- !query 29 +SELECT * FROM src src1 + LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +-- !query 29 schema +struct +-- !query 29 output +NULL NULL NULL NULL 165 val_165 +NULL NULL NULL NULL 165 val_165 +NULL NULL NULL NULL 251 val_251 +NULL NULL NULL NULL 330 val_330 +NULL NULL NULL NULL 86 val_86 + + +-- !query 30 +SELECT * FROM src src1 + RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +-- !query 30 schema +struct +-- !query 30 output +NULL NULL 165 val_165 165 val_165 +NULL NULL 165 val_165 165 val_165 +NULL NULL 165 val_165 165 val_165 +NULL NULL 165 val_165 165 val_165 +NULL NULL 86 val_86 86 val_86 +NULL NULL NULL NULL 251 val_251 +NULL NULL NULL NULL 330 val_330 + + +-- !query 31 +SELECT * FROM src src1 + LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +-- !query 31 schema +struct +-- !query 31 output +165 val_165 NULL NULL NULL NULL +165 val_165 NULL NULL NULL NULL +251 val_251 NULL NULL NULL NULL +330 val_330 NULL NULL NULL NULL +86 val_86 NULL NULL NULL NULL + + +-- !query 32 +SELECT * FROM src src1 + RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +-- !query 32 schema +struct +-- !query 32 output +NULL NULL 165 val_165 165 val_165 +NULL NULL 165 val_165 165 val_165 +NULL NULL 165 val_165 165 val_165 +NULL NULL 165 val_165 165 val_165 +NULL NULL 251 val_251 NULL NULL +NULL NULL 330 val_330 NULL NULL +NULL NULL 86 val_86 86 val_86 + + +-- !query 33 +SELECT * FROM src src1 + JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +-- !query 33 schema +struct +-- !query 33 output + + + +-- !query 34 +SELECT * FROM src src1 + JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +-- !query 34 schema +struct +-- !query 34 output +NULL NULL NULL NULL 165 val_165 +NULL NULL NULL NULL 165 val_165 +NULL NULL NULL NULL 251 val_251 +NULL NULL NULL NULL 330 val_330 +NULL NULL NULL NULL 86 val_86 + + +-- !query 35 +SELECT * FROM src src1 + LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +-- !query 35 schema +struct +-- !query 35 output + + + +-- !query 36 +SELECT * FROM src src1 + RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +-- !query 36 schema +struct +-- !query 36 output +NULL NULL 165 val_165 165 val_165 +NULL NULL 165 val_165 165 val_165 +NULL NULL 165 val_165 165 val_165 +NULL NULL 165 val_165 165 val_165 +NULL NULL 86 val_86 86 val_86 + + +-- !query 37 +FROM +(SELECT src.* FROM src sort by key) x +JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +select Y.key,Y.value +-- !query 37 schema +struct +-- !query 37 output +165 val_165 +165 val_165 +165 val_165 +165 val_165 +251 val_251 +330 val_330 +86 val_86 + + +-- !query 38 +FROM +(SELECT src.* FROM src sort by key) x +LEFT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +select Y.key,Y.value +-- !query 38 schema +struct +-- !query 38 output +165 val_165 +165 val_165 +165 val_165 +165 val_165 +251 val_251 +330 val_330 +86 val_86 + + +-- !query 39 +FROM +(SELECT src.* FROM src sort by key) x +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +select Y.key,Y.value +-- !query 39 schema +struct +-- !query 39 output +165 val_165 +165 val_165 +165 val_165 +165 val_165 +251 val_251 +330 val_330 +86 val_86 + + +-- !query 40 +FROM +(SELECT src.* FROM src sort by key) x +JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value +-- !query 40 schema +struct +-- !query 40 output +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +251 val_251 +330 val_330 +86 val_86 + + +-- !query 41 +FROM +(SELECT src.* FROM src sort by key) x +JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value +-- !query 41 schema +struct +-- !query 41 output +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +251 val_251 +330 val_330 +86 val_86 + + +-- !query 42 +FROM +(SELECT src.* FROM src sort by key) x +LEFT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value +-- !query 42 schema +struct +-- !query 42 output +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +251 val_251 +330 val_330 +86 val_86 + + +-- !query 43 +FROM +(SELECT src.* FROM src sort by key) x +LEFT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value +-- !query 43 schema +struct +-- !query 43 output +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +251 val_251 +330 val_330 +86 val_86 + + +-- !query 44 +FROM +(SELECT src.* FROM src sort by key) x +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value +-- !query 44 schema +struct +-- !query 44 output +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +251 val_251 +330 val_330 +86 val_86 + + +-- !query 45 +FROM +(SELECT src.* FROM src sort by key) x +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value +-- !query 45 schema +struct +-- !query 45 output +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +251 val_251 +330 val_330 +86 val_86 diff --git a/sql/core/src/test/resources/test-data/kv3.json b/sql/core/src/test/resources/test-data/kv3.json new file mode 100644 index 0000000000000..9bb1555f7df02 --- /dev/null +++ b/sql/core/src/test/resources/test-data/kv3.json @@ -0,0 +1,7 @@ +{"key":201,"value":null} +{"key":86,"value":"val_86"} +{"key":null,"value":"val_null"} +{"key":165,"value":"val_165"} +{"key":null,"value":null} +{"key":330,"value":"val_330"} +{"key":165,"value":null} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index 4dc8d61ce79b5..f7f3a6473e460 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -255,6 +255,10 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext { session.read.schema(srcSchema).json(getFilePath("test-data/kv1.json")) .createOrReplaceTempView("src") + val src1Schema = new StructType().add("key", IntegerType).add("value", StringType) + session.read.schema(src1Schema).json(getFilePath("test-data/kv3.json")) + .createOrReplaceTempView("src1") + val srcpartSchema = new StructType().add("key", IntegerType).add("value", StringType) .add("ds", StringType).add("hr", StringType) session.read.schema(srcpartSchema).json(getFilePath("test-data/srcpart.json")) From 98f7f11a024f42381a144dfe9d59dc25383dc690 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Sat, 13 Aug 2016 17:34:35 -0700 Subject: [PATCH 07/27] added meaningful comments --- .../test/resources/sql-tests/inputs/join.sql | 94 +++++++++---------- 1 file changed, 47 insertions(+), 47 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/join.sql b/sql/core/src/test/resources/sql-tests/inputs/join.sql index 72f7d06295557..81954fd6f2050 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/join.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/join.sql @@ -1,4 +1,4 @@ --- self-join (auto_join0.q) +-- join nested table expressions (auto_join0.q) SELECT a.k1, a.v1, a.k2, a.v2 FROM ( SELECT src1.key as k1, src1.value as v1, @@ -9,20 +9,21 @@ SELECT src1.key as k1, src1.value as v1, SORT BY k1, v1, k2, v2 ) a; --- self-join (auto_join1.q) +-- self-join + join condition (auto_join1.q) SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key); --- self-join (auto_join2.q) +-- equi inner join + inner join with a complex join condition (auto_join2.q) SELECT src1.key, src3.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key); --- self-join (auto_join3.q) +-- equi inner join + equi inner join (auto_join3.q) SELECT src1.key, src3.value -FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key); +FROM src src1 JOIN src src2 ON (src1.key = src2.key) + JOIN src src3 ON (src1.key = src3.key); --- left-outer join (auto_join4.q) +-- left-outer join over two nested table expressions (auto_join4.q) SELECT c.c1, c.c2, c.c3, c.c4 FROM ( SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 @@ -37,7 +38,7 @@ FROM ( ON (a.c1 = b.c3) ) c; --- right-outer join (auto_join5.q) +-- right-outer join over two nested table expressions (auto_join5.q) SELECT c.c1, c.c2, c.c3, c.c4 FROM ( SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 @@ -52,7 +53,7 @@ FROM ( ON (a.c1 = b.c3) ) c; --- full-outer join (auto_join6.q) +-- full-outer join over two nested table expressions (auto_join6.q) SELECT c.c1, c.c2, c.c3, c.c4 FROM ( SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 @@ -67,7 +68,7 @@ FROM ( ON (a.c1 = b.c3) ) c; --- full-outer join + left-outer join (auto_join7.q) +-- full-outer join + left-outer join over nested table expressions (auto_join7.q) SELECT c.c1, c.c2, c.c3, c.c4, c.c5, c.c6 FROM ( SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4, c.c5 AS c5, c.c6 AS c6 @@ -87,7 +88,7 @@ FROM ( ON (a.c1 = c.c5) ) c; --- left-outer join (auto_join8.q) +-- left-outer join + join condition + filter (auto_join8.q) SELECT c.c1, c.c2, c.c3, c.c4 FROM ( SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 @@ -103,12 +104,12 @@ FROM ( ) c where c.c3 IS NULL AND c.c1 IS NOT NULL; --- join (auto_join9.q) +-- inner join + join condition + filter (auto_join9.q) SELECT src1.key, src2.value FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) WHERE src1.ds = '2008-04-08' and src1.hr = '12'; --- self-join (auto_join10.q) +-- equi inner join + star expansion in nested table expression (auto_join10.q) FROM (SELECT src.* FROM src) x JOIN @@ -116,7 +117,7 @@ JOIN ON (x.key = Y.key) select Y.key, Y.value; --- self-join (auto_join11.q) +-- inner join with a complex join condition over nested table expressions (auto_join11.q) SELECT src1.c1, src2.c4 FROM (SELECT src.key as c1, src.value as c2 from src) src1 @@ -124,7 +125,7 @@ FROM (SELECT src.key as c3, src.value as c4 from src) src2 ON src1.c1 = src2.c3 AND src1.c1 < 200; --- join (auto_join12.q) +-- two inner join with a complex join condition over nested table expressions (auto_join12.q) SELECT src1.c1, src2.c4 FROM (SELECT src.key as c1, src.value as c2 from src) src1 @@ -135,7 +136,7 @@ FROM (SELECT src.key as c5, src.value as c6 from src) src3 ON src1.c1 = src3.c5 AND src3.c5 < 100; --- join (auto_join13.q) +-- two inner join with a complex join condition over nested table expressions (auto_join13.q) SELECT src1.c1, src2.c4 FROM (SELECT src.key as c1, src.value as c2 from src) src1 @@ -146,11 +147,11 @@ FROM (SELECT src.key as c5, src.value as c6 from src) src3 ON src1.c1 + src2.c3 = src3.c5 AND src3.c5 < 400; --- join (auto_join14.q) +-- join two different tables (auto_join14.q) FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 200 SELECT src.key, srcpart.value; --- join (auto_join15.q) +-- join + sort by (auto_join15.q) SELECT a.k1, a.v1, a.k2, a.v2 FROM ( SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 @@ -158,7 +159,7 @@ SELECT a.k1, a.v1, a.k2, a.v2 SORT BY k1, v1, k2, v2 ) a; --- join (auto_join16.q) +-- inner join with a filter above join and a filter below join (auto_join16.q) SELECT subq.key, tab.value FROM (select a.key, a.value from src a where a.key > 100 ) subq @@ -166,11 +167,11 @@ JOIN src tab ON (subq.key = tab.key and subq.key > 150 and subq.value = tab.value) where tab.key < 200; --- join (auto_join17.q) +-- star expansion in nested table expression (auto_join17.q) SELECT src1.*, src2.* FROM src src1 JOIN src src2 ON (src1.key = src2.key); --- join (auto_join18.q) +-- full outer join over Aggregate (auto_join18.q) SELECT a.key, a.value, b.key, b.value FROM ( @@ -183,7 +184,7 @@ FROM ) b ON (a.key = b.key); --- join (auto_join18_multi_distinct.q) +-- full outer join + multi distinct (auto_join18_multi_distinct.q) SELECT a.key, a.value, b.key, b.value1, b.value2 FROM ( @@ -197,12 +198,12 @@ FROM ) b ON (a.key = b.key); --- join (auto_join19.q) +-- join + disjunctive conditions (auto_join19.q) SELECT src1.key, src2.value FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11'); --- join (auto_join20.q) +-- inner join + right-outer join #1 (auto_join20.q) SELECT a.k1,a.v1,a.k2,a.v2,a.k3,a.v3 FROM ( SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 @@ -211,7 +212,7 @@ FROM ( SORT BY k1,v1,k2,v2,k3,v3 )a; --- join (auto_join20.q) +-- inner join + right-outer join #2 (auto_join20.q) SELECT a.k1,a.v1,a.k2,a.v2,a.k3,a.v3 FROM ( SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 @@ -220,7 +221,7 @@ FROM ( SORT BY k1,v1,k2,v2,k3,v3 )a; --- join (auto_join21.q) +-- left outer join + right outer join (auto_join21.q) SELECT * FROM src src1 @@ -228,7 +229,7 @@ FROM RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; --- join (auto_join22.q) +-- nested join (auto_join22.q) SELECT src5.src1_value FROM (SELECT src3.*, src4.value as src4_value, src4.key as src4_key @@ -238,7 +239,7 @@ FROM JOIN src src2 ON src1.key = src2.key) src3 ON src3.src1_key = src4.key) src5; --- join (auto_join23.q) +-- Cartesian join (auto_join23.q) SELECT * FROM src src1 JOIN src src2 WHERE src1.key < 200 and src2.key < 200 SORT BY src1.key, src1.value, src2.key, src2.value; @@ -247,10 +248,10 @@ SORT BY src1.key, src1.value, src2.key, src2.value; WITH tst1 AS (SELECT a.key, count(1) as cnt FROM src a group by a.key) SELECT sum(a.cnt) FROM tst1 a JOIN tst1 b ON a.key = b.key; --- join (auto_join26.q) +-- aggregate over join results (auto_join26.q) SELECT x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key order by x.key; --- join (auto_join27.q) +-- join over set operation over aggregate (auto_join27.q) SELECT count(1) FROM ( @@ -264,55 +265,55 @@ JOIN ) src3 ON src_12.key = src3.k AND src3.k < 300; --- join (auto_join28.q) +-- left outer + right outer (auto_join28.q) SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; --- join (auto_join28.q) +-- right outer + right outer (auto_join28.q) SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; --- join (auto_join28.q) +-- left outer + left outer (auto_join28.q) SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; --- join (auto_join28.q) +-- right outer + left outer (auto_join28.q) SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; --- join (auto_join29.q) +-- inner + left outer (auto_join29.q) SELECT * FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; --- join (auto_join29.q) +-- inner + right outer (auto_join29.q) SELECT * FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; --- join (auto_join29.q) +-- left + inner outer (auto_join29.q) SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; --- join (auto_join29.q) +-- right + inner join (auto_join29.q) SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; --- join (auto_join30.q) +-- inner join with sorted by nested table expression (auto_join30.q) FROM (SELECT src.* FROM src sort by key) x JOIN @@ -320,7 +321,7 @@ JOIN ON (x.key = Y.key) select Y.key,Y.value; --- join (auto_join30.q) +-- left outer join with sorted by nested table expression (auto_join30.q) FROM (SELECT src.* FROM src sort by key) x LEFT OUTER JOIN @@ -328,7 +329,7 @@ LEFT OUTER JOIN ON (x.key = Y.key) select Y.key,Y.value; --- join (auto_join30.q) +-- right outer join with sorted by nested table expression (auto_join30.q) FROM (SELECT src.* FROM src sort by key) x RIGHT OUTER JOIN @@ -336,7 +337,7 @@ RIGHT OUTER JOIN ON (x.key = Y.key) select Y.key,Y.value; --- join (auto_join30.q) +-- inner + inner with sorted by nested table expression (auto_join30.q) FROM (SELECT src.* FROM src sort by key) x JOIN @@ -347,7 +348,7 @@ JOIN ON (x.key = Z.key) select Y.key,Y.value; --- join (auto_join30.q) +-- inner + left outer with sorted by nested table expression (auto_join30.q) FROM (SELECT src.* FROM src sort by key) x JOIN @@ -358,7 +359,7 @@ LEFT OUTER JOIN ON (x.key = Z.key) select Y.key,Y.value; --- join (auto_join30.q) +-- left + left outer with sorted by nested table expression (auto_join30.q) FROM (SELECT src.* FROM src sort by key) x LEFT OUTER JOIN @@ -369,7 +370,7 @@ LEFT OUTER JOIN ON (x.key = Z.key) select Y.key,Y.value; --- join (auto_join30.q) +-- left + right outer with sorted by nested table expression (auto_join30.q) FROM (SELECT src.* FROM src sort by key) x LEFT OUTER JOIN @@ -380,7 +381,7 @@ RIGHT OUTER JOIN ON (x.key = Z.key) select Y.key,Y.value; --- join (auto_join30.q) +-- right + right outer with sorted by nested table expression (auto_join30.q) FROM (SELECT src.* FROM src sort by key) x RIGHT OUTER JOIN @@ -391,7 +392,7 @@ RIGHT OUTER JOIN ON (x.key = Z.key) select Y.key,Y.value; --- join (auto_join31.q) +-- right outer + inner with sorted by nested table expression (auto_join30.q) FROM (SELECT src.* FROM src sort by key) x RIGHT OUTER JOIN @@ -401,4 +402,3 @@ JOIN (SELECT src.* FROM src sort by value) Z ON (x.key = Z.key) select Y.key,Y.value; - From a26bdebd98f3a4d496211c3a3a54e6036457ddf3 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Sat, 13 Aug 2016 17:35:49 -0700 Subject: [PATCH 08/27] added meaningful comments --- sql/core/src/test/resources/sql-tests/results/join.sql.out | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/core/src/test/resources/sql-tests/results/join.sql.out b/sql/core/src/test/resources/sql-tests/results/join.sql.out index f20f3e03b8ea1..d686730ff3806 100644 --- a/sql/core/src/test/resources/sql-tests/results/join.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/join.sql.out @@ -56,7 +56,8 @@ struct -- !query 3 SELECT src1.key, src3.value -FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key) +FROM src src1 JOIN src src2 ON (src1.key = src2.key) + JOIN src src3 ON (src1.key = src3.key) -- !query 3 schema struct -- !query 3 output From cdea1a3ab334fc264be16f81b3f94102111c0489 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Sat, 13 Aug 2016 17:36:43 -0700 Subject: [PATCH 09/27] added meaningful comments --- sql/core/src/test/resources/sql-tests/inputs/join.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/join.sql b/sql/core/src/test/resources/sql-tests/inputs/join.sql index 81954fd6f2050..103193a5cface 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/join.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/join.sql @@ -392,7 +392,7 @@ RIGHT OUTER JOIN ON (x.key = Z.key) select Y.key,Y.value; --- right outer + inner with sorted by nested table expression (auto_join30.q) +-- right outer + inner with sorted by nested table expression (auto_join31.q) FROM (SELECT src.* FROM src sort by key) x RIGHT OUTER JOIN From 9c67e69c82423c8eb009b93c9f790c41703902fb Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Sat, 13 Aug 2016 23:38:02 -0700 Subject: [PATCH 10/27] batch 3 --- .../test/resources/sql-tests/inputs/join.sql | 58 +++++- .../resources/sql-tests/results/join.sql.out | 169 ++++++++++++++++- .../org/apache/spark/sql/SQLQuerySuite.scala | 170 ------------------ .../apache/spark/sql/SQLQueryTestSuite.scala | 12 ++ 4 files changed, 237 insertions(+), 172 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/join.sql b/sql/core/src/test/resources/sql-tests/inputs/join.sql index 103193a5cface..22ea110f73500 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/join.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/join.sql @@ -109,7 +109,7 @@ SELECT src1.key, src2.value FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) WHERE src1.ds = '2008-04-08' and src1.hr = '12'; --- equi inner join + star expansion in nested table expression (auto_join10.q) +-- equi inner join + table.star expansion in nested table expression (auto_join10.q) FROM (SELECT src.* FROM src) x JOIN @@ -402,3 +402,59 @@ JOIN (SELECT src.* FROM src sort by value) Z ON (x.key = Z.key) select Y.key,Y.value; + +-- self join with aliases +SELECT x.key, COUNT(*) +FROM src x JOIN src y ON x.key = y.key +GROUP BY x.key; + +-- left semi greater than predicate +SELECT * FROM testData2 x LEFT SEMI JOIN testData2 y ON x.a >= y.a + 2; + +-- left semi greater than predicate and equal operator #1 +SELECT * FROM testData2 x LEFT SEMI JOIN testData2 y ON x.b = y.b and x.a >= y.a + 2; + +-- left semi greater than predicate and equal operator #2 +SELECT * FROM testData2 x LEFT SEMI JOIN testData2 y ON x.b = y.a and x.a >= y.b + 1; + +-- inner join with one-match-per-row filtering predicates (where) +SELECT * FROM uppercasedata u JOIN lowercasedata l WHERE u.n = l.N; + +-- inner join with one-match-per-row join conditions (on) +SELECT * FROM uppercasedata u JOIN lowercasedata l ON u.n = l.N; + +-- inner join with multiple-match-per-row filtering predicates (where) +SELECT * FROM + (SELECT * FROM testdata2 WHERE a = 1) x JOIN + (SELECT * FROM testdata2 WHERE a = 1) y +WHERE x.a = y.a; + +-- inner join with no-match-per-row filtering predicates (where) +SELECT * FROM + (SELECT * FROM testData2 WHERE a = 1) x JOIN + (SELECT * FROM testData2 WHERE a = 2) y +WHERE x.a = y.a; + +-- basic full outer join +SELECT * FROM + (SELECT * FROM upperCaseData WHERE N <= 4) leftTable FULL OUTER JOIN + (SELECT * FROM upperCaseData WHERE N >= 3) rightTable + ON leftTable.N = rightTable.N; + +-- basic right outer join +SELECT * FROM lowercasedata l RIGHT OUTER JOIN uppercasedata u ON l.n = u.N; + +-- basic left outer join +SELECT * FROM uppercasedata u LEFT OUTER JOIN lowercasedata l ON l.n = u.N; + +-- inner join ON with table name as qualifier +SELECT * FROM upperCaseData JOIN lowerCaseData ON lowerCaseData.n = upperCaseData.N; + +-- qualified select with inner join ON with table name as qualifier +SELECT upperCaseData.N, upperCaseData.L FROM upperCaseData JOIN lowerCaseData + ON lowerCaseData.n = upperCaseData.N; + +-- SPARK-4120 Join of multiple tables does not work in SparkSQL +SELECT a.key, b.key, c.key +FROM testData a,testData b,testData c +where a.key = b.key and a.key = c.key and a.key < 5; diff --git a/sql/core/src/test/resources/sql-tests/results/join.sql.out b/sql/core/src/test/resources/sql-tests/results/join.sql.out index d686730ff3806..aac79624c9294 100644 --- a/sql/core/src/test/resources/sql-tests/results/join.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/join.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 46 +-- Number of queries: 60 -- !query 0 @@ -890,3 +890,170 @@ struct 251 val_251 330 val_330 86 val_86 + + +-- !query 46 +SELECT x.key, COUNT(*) +FROM src x JOIN src y ON x.key = y.key +GROUP BY x.key +-- !query 46 schema +struct +-- !query 46 output +165 4 +251 1 +330 1 +86 1 + + +-- !query 47 +SELECT * FROM testData2 x LEFT SEMI JOIN testData2 y ON x.a >= y.a + 2 +-- !query 47 schema +struct +-- !query 47 output +3 1 +3 2 + + +-- !query 48 +SELECT * FROM testData2 x LEFT SEMI JOIN testData2 y ON x.b = y.b and x.a >= y.a + 2 +-- !query 48 schema +struct +-- !query 48 output +3 1 +3 2 + + +-- !query 49 +SELECT * FROM testData2 x LEFT SEMI JOIN testData2 y ON x.b = y.a and x.a >= y.b + 1 +-- !query 49 schema +struct +-- !query 49 output +2 1 +2 2 +3 1 +3 2 + + +-- !query 50 +SELECT * FROM uppercasedata u JOIN lowercasedata l WHERE u.n = l.N +-- !query 50 schema +struct +-- !query 50 output +1 A 1 a +2 B 2 b +3 C 3 c +4 D 4 d + + +-- !query 51 +SELECT * FROM uppercasedata u JOIN lowercasedata l ON u.n = l.N +-- !query 51 schema +struct +-- !query 51 output +1 A 1 a +2 B 2 b +3 C 3 c +4 D 4 d + + +-- !query 52 +SELECT * FROM + (SELECT * FROM testdata2 WHERE a = 1) x JOIN + (SELECT * FROM testdata2 WHERE a = 1) y +WHERE x.a = y.a +-- !query 52 schema +struct +-- !query 52 output +1 1 1 1 +1 1 1 2 +1 2 1 1 +1 2 1 2 + + +-- !query 53 +SELECT * FROM + (SELECT * FROM testData2 WHERE a = 1) x JOIN + (SELECT * FROM testData2 WHERE a = 2) y +WHERE x.a = y.a +-- !query 53 schema +struct +-- !query 53 output + + + +-- !query 54 +SELECT * FROM + (SELECT * FROM upperCaseData WHERE N <= 4) leftTable FULL OUTER JOIN + (SELECT * FROM upperCaseData WHERE N >= 3) rightTable + ON leftTable.N = rightTable.N +-- !query 54 schema +struct +-- !query 54 output +1 A NULL NULL +2 B NULL NULL +3 C 3 C +4 D 4 D +NULL NULL 5 E +NULL NULL 6 F + + +-- !query 55 +SELECT * FROM lowercasedata l RIGHT OUTER JOIN uppercasedata u ON l.n = u.N +-- !query 55 schema +struct +-- !query 55 output +1 a 1 A +2 b 2 B +3 c 3 C +4 d 4 D +NULL NULL 5 E +NULL NULL 6 F + + +-- !query 56 +SELECT * FROM uppercasedata u LEFT OUTER JOIN lowercasedata l ON l.n = u.N +-- !query 56 schema +struct +-- !query 56 output +1 A 1 a +2 B 2 b +3 C 3 c +4 D 4 d +5 E NULL NULL +6 F NULL NULL + + +-- !query 57 +SELECT * FROM upperCaseData JOIN lowerCaseData ON lowerCaseData.n = upperCaseData.N +-- !query 57 schema +struct +-- !query 57 output +1 A 1 a +2 B 2 b +3 C 3 c +4 D 4 d + + +-- !query 58 +SELECT upperCaseData.N, upperCaseData.L FROM upperCaseData JOIN lowerCaseData + ON lowerCaseData.n = upperCaseData.N +-- !query 58 schema +struct +-- !query 58 output +1 A +2 B +3 C +4 D + + +-- !query 59 +SELECT a.key, b.key, c.key +FROM testData a,testData b,testData c +where a.key = b.key and a.key = c.key and a.key < 5 +-- !query 59 schema +struct +-- !query 59 output +1 1 1 +2 2 2 +3 3 3 +4 4 4 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 4fcde58833d76..e936cdf0843ba 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -126,19 +126,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { } } - test("self join with aliases") { - Seq(1, 2, 3).map(i => (i, i.toString)).toDF("int", "str").createOrReplaceTempView("df") - - checkAnswer( - sql( - """ - |SELECT x.str, COUNT(*) - |FROM df x JOIN df y ON x.str = y.str - |GROUP BY x.str - """.stripMargin), - Row("1", 1) :: Row("2", 1) :: Row("3", 1) :: Nil) - } - test("support table.star") { checkAnswer( sql( @@ -451,27 +438,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { arrayData.map(d => Row(d.data, d.data(0), d.data(0) + d.data(1), d.data(1))).collect()) } - test("left semi greater than predicate") { - withSQLConf(SQLConf.CROSS_JOINS_ENABLED.key -> "true") { - checkAnswer( - sql("SELECT * FROM testData2 x LEFT SEMI JOIN testData2 y ON x.a >= y.a + 2"), - Seq(Row(3, 1), Row(3, 2)) - ) - } - } - - test("left semi greater than predicate and equal operator") { - checkAnswer( - sql("SELECT * FROM testData2 x LEFT SEMI JOIN testData2 y ON x.b = y.b and x.a >= y.a + 2"), - Seq(Row(3, 1), Row(3, 2)) - ) - - checkAnswer( - sql("SELECT * FROM testData2 x LEFT SEMI JOIN testData2 y ON x.b = y.a and x.a >= y.b + 1"), - Seq(Row(2, 1), Row(2, 2), Row(3, 1), Row(3, 2)) - ) - } - test("index into array of arrays") { checkAnswer( sql( @@ -670,57 +636,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { } } - test("inner join where, one match per row") { - withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { - checkAnswer( - sql("SELECT * FROM uppercasedata JOIN lowercasedata WHERE n = N"), - Seq( - Row(1, "A", 1, "a"), - Row(2, "B", 2, "b"), - Row(3, "C", 3, "c"), - Row(4, "D", 4, "d"))) - } - } - - test("inner join ON, one match per row") { - withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { - checkAnswer( - sql("SELECT * FROM uppercasedata JOIN lowercasedata ON n = N"), - Seq( - Row(1, "A", 1, "a"), - Row(2, "B", 2, "b"), - Row(3, "C", 3, "c"), - Row(4, "D", 4, "d"))) - } - } - - test("inner join, where, multiple matches") { - withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { - checkAnswer( - sql( - """ - |SELECT * FROM - | (SELECT * FROM testdata2 WHERE a = 1) x JOIN - | (SELECT * FROM testdata2 WHERE a = 1) y - |WHERE x.a = y.a""".stripMargin), - Row(1, 1, 1, 1) :: - Row(1, 1, 1, 2) :: - Row(1, 2, 1, 1) :: - Row(1, 2, 1, 2) :: Nil) - } - } - - test("inner join, no matches") { - checkAnswer( - sql( - """ - |SELECT * FROM - | (SELECT * FROM testData2 WHERE a = 1) x JOIN - | (SELECT * FROM testData2 WHERE a = 2) y - |WHERE x.a = y.a""".stripMargin), - Nil) - } - test("big inner join, 4 matches per row") { checkAnswer( sql( @@ -750,49 +665,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { } } - test("left outer join") { - withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { - checkAnswer( - sql("SELECT * FROM uppercasedata LEFT OUTER JOIN lowercasedata ON n = N"), - Row(1, "A", 1, "a") :: - Row(2, "B", 2, "b") :: - Row(3, "C", 3, "c") :: - Row(4, "D", 4, "d") :: - Row(5, "E", null, null) :: - Row(6, "F", null, null) :: Nil) - } - } - - test("right outer join") { - withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") { - checkAnswer( - sql("SELECT * FROM lowercasedata RIGHT OUTER JOIN uppercasedata ON n = N"), - Row(1, "a", 1, "A") :: - Row(2, "b", 2, "B") :: - Row(3, "c", 3, "C") :: - Row(4, "d", 4, "D") :: - Row(null, null, 5, "E") :: - Row(null, null, 6, "F") :: Nil) - } - } - - test("full outer join") { - checkAnswer( - sql( - """ - |SELECT * FROM - | (SELECT * FROM upperCaseData WHERE N <= 4) leftTable FULL OUTER JOIN - | (SELECT * FROM upperCaseData WHERE N >= 3) rightTable - | ON leftTable.N = rightTable.N - """.stripMargin), - Row(1, "A", null, null) :: - Row(2, "B", null, null) :: - Row(3, "C", 3, "C") :: - Row (4, "D", 4, "D") :: - Row(null, null, 5, "E") :: - Row(null, null, 6, "F") :: Nil) - } - test("SPARK-11111 null-safe join should not use cartesian product") { val df = sql("select count(*) from testData a join testData b on (a.key <=> b.key)") val cp = df.queryExecution.sparkPlan.collect { @@ -847,27 +719,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { Row("1")) } - test("inner join ON with table name as qualifier") { - checkAnswer( - sql("SELECT * FROM upperCaseData JOIN lowerCaseData ON lowerCaseData.n = upperCaseData.N"), - Seq( - Row(1, "A", 1, "a"), - Row(2, "B", 2, "b"), - Row(3, "C", 3, "c"), - Row(4, "D", 4, "d"))) - } - - test("qualified select with inner join ON with table name as qualifier") { - checkAnswer( - sql("SELECT upperCaseData.N, upperCaseData.L FROM upperCaseData JOIN lowerCaseData " + - "ON lowerCaseData.n = upperCaseData.N"), - Seq( - Row(1, "A"), - Row(2, "B"), - Row(3, "C"), - Row(4, "D"))) - } - test("system function upper()") { checkAnswer( sql("SELECT n,UPPER(l) FROM lowerCaseData"), @@ -1202,17 +1053,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { } } - test("Multiple join") { - checkAnswer( - sql( - """SELECT a.key, b.key, c.key - |FROM testData a - |JOIN testData b ON a.key = b.key - |JOIN testData c ON a.key = c.key - """.stripMargin), - (1 to 100).map(i => Row(i, i, i))) - } - test("SPARK-3483 Special chars in column names") { val data = sparkContext.parallelize( Seq("""{"key?number1": "value1", "key.number2": "value2"}""")) @@ -1236,16 +1076,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { checkAnswer(sql("SELECT ~key FROM testData WHERE key = 1 "), Row(-2)) } - test("SPARK-4120 Join of multiple tables does not work in SparkSQL") { - checkAnswer( - sql( - """SELECT a.key, b.key, c.key - |FROM testData a,testData b,testData c - |where a.key = b.key and a.key = c.key - """.stripMargin), - (1 to 100).map(i => Row(i, i, i))) - } - test("SPARK-4154 Query does not work if it has 'not between' in Spark SQL and HQL") { checkAnswer(sql("SELECT key FROM testData WHERE key not between 0 and 10 order by key"), (11 to 100).map(i => Row(i))) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index f7f3a6473e460..4dccbd89e258f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -239,6 +239,10 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext { (1 to 100).map(i => (i, i.toString)).toDF("key", "value").createOrReplaceTempView("testdata") + Seq((1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2)) + .toDF("a", "b") + .createOrReplaceTempView("testData2") + ((Seq(1, 2, 3), Seq(Seq(1, 2, 3))) :: (Seq(2, 3, 4), Seq(Seq(2, 3, 4))) :: Nil) .toDF("arraycol", "nestedarraycol") .createOrReplaceTempView("arraydata") @@ -251,6 +255,14 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext { .toDF("mapcol") .createOrReplaceTempView("mapdata") + Seq((1, "a"), (2, "b"), (3, "c"), (4, "d")) + .toDF("n", "l") + .createOrReplaceTempView("lowerCaseData") + + Seq((1, "A"), (2, "B"), (3, "C"), (4, "D"), (5, "E"), (6, "F")) + .toDF("N", "L") + .createOrReplaceTempView("upperCaseData") + val srcSchema = new StructType().add("key", IntegerType).add("value", StringType) session.read.schema(srcSchema).json(getFilePath("test-data/kv1.json")) .createOrReplaceTempView("src") From 5bd85c2099384a8dc7d909fb7e5bd92e98bfa20a Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Sun, 14 Aug 2016 09:01:41 -0700 Subject: [PATCH 11/27] two more cases --- .../test/resources/sql-tests/inputs/join.sql | 18 +++ .../resources/sql-tests/results/join.sql.out | 121 ++++++++++++------ 2 files changed, 97 insertions(+), 42 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/join.sql b/sql/core/src/test/resources/sql-tests/inputs/join.sql index 22ea110f73500..9e27c1404447b 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/join.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/join.sql @@ -403,6 +403,24 @@ JOIN ON (x.key = Z.key) select Y.key,Y.value; +-- join over set operation (join34.q) +SELECT x.key, x.value, subq1.value +FROM +( SELECT x.key as key, x.value as value from src x where x.key < 200 + UNION ALL + SELECT x1.key as key, x1.value as value from src x1 where x1.key > 100 +) subq1 +JOIN src1 x ON (x.key = subq1.key); + +-- join over set operation over aggregate (join35.q) +SELECT x.key, x.value, subq1.cnt +FROM +( SELECT x.key as key, count(1) as cnt from src x where x.key < 200 group by x.key + UNION ALL + SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key +) subq1 +JOIN src1 x ON (x.key = subq1.key); + -- self join with aliases SELECT x.key, COUNT(*) FROM src x JOIN src y ON x.key = y.key diff --git a/sql/core/src/test/resources/sql-tests/results/join.sql.out b/sql/core/src/test/resources/sql-tests/results/join.sql.out index aac79624c9294..9b8f3c853efd5 100644 --- a/sql/core/src/test/resources/sql-tests/results/join.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/join.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 60 +-- Number of queries: 62 -- !query 0 @@ -893,102 +893,139 @@ struct -- !query 46 +SELECT x.key, x.value, subq1.value +FROM +( SELECT x.key as key, x.value as value from src x where x.key < 20 + UNION ALL + SELECT x1.key as key, x1.value as value from src x1 where x1.key > 100 +) subq1 +JOIN src1 x ON (x.key = subq1.key) +-- !query 46 schema +struct +-- !query 46 output +165 NULL val_165 +165 NULL val_165 +165 val_165 val_165 +165 val_165 val_165 +330 val_330 val_330 + + +-- !query 47 +SELECT x.key, x.value, subq1.cnt +FROM +( SELECT x.key as key, count(1) as cnt from src x where x.key < 200 group by x.key + UNION ALL + SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key +) subq1 +JOIN src1 x ON (x.key = subq1.key) +-- !query 47 schema +struct +-- !query 47 output +165 NULL 2 +165 NULL 2 +165 val_165 2 +165 val_165 2 +330 val_330 1 +86 val_86 1 + + +-- !query 48 SELECT x.key, COUNT(*) FROM src x JOIN src y ON x.key = y.key GROUP BY x.key --- !query 46 schema +-- !query 48 schema struct --- !query 46 output +-- !query 48 output 165 4 251 1 330 1 86 1 --- !query 47 +-- !query 49 SELECT * FROM testData2 x LEFT SEMI JOIN testData2 y ON x.a >= y.a + 2 --- !query 47 schema +-- !query 49 schema struct --- !query 47 output +-- !query 49 output 3 1 3 2 --- !query 48 +-- !query 50 SELECT * FROM testData2 x LEFT SEMI JOIN testData2 y ON x.b = y.b and x.a >= y.a + 2 --- !query 48 schema +-- !query 50 schema struct --- !query 48 output +-- !query 50 output 3 1 3 2 --- !query 49 +-- !query 51 SELECT * FROM testData2 x LEFT SEMI JOIN testData2 y ON x.b = y.a and x.a >= y.b + 1 --- !query 49 schema +-- !query 51 schema struct --- !query 49 output +-- !query 51 output 2 1 2 2 3 1 3 2 --- !query 50 +-- !query 52 SELECT * FROM uppercasedata u JOIN lowercasedata l WHERE u.n = l.N --- !query 50 schema +-- !query 52 schema struct --- !query 50 output +-- !query 52 output 1 A 1 a 2 B 2 b 3 C 3 c 4 D 4 d --- !query 51 +-- !query 53 SELECT * FROM uppercasedata u JOIN lowercasedata l ON u.n = l.N --- !query 51 schema +-- !query 53 schema struct --- !query 51 output +-- !query 53 output 1 A 1 a 2 B 2 b 3 C 3 c 4 D 4 d --- !query 52 +-- !query 54 SELECT * FROM (SELECT * FROM testdata2 WHERE a = 1) x JOIN (SELECT * FROM testdata2 WHERE a = 1) y WHERE x.a = y.a --- !query 52 schema +-- !query 54 schema struct --- !query 52 output +-- !query 54 output 1 1 1 1 1 1 1 2 1 2 1 1 1 2 1 2 --- !query 53 +-- !query 55 SELECT * FROM (SELECT * FROM testData2 WHERE a = 1) x JOIN (SELECT * FROM testData2 WHERE a = 2) y WHERE x.a = y.a --- !query 53 schema +-- !query 55 schema struct --- !query 53 output +-- !query 55 output --- !query 54 +-- !query 56 SELECT * FROM (SELECT * FROM upperCaseData WHERE N <= 4) leftTable FULL OUTER JOIN (SELECT * FROM upperCaseData WHERE N >= 3) rightTable ON leftTable.N = rightTable.N --- !query 54 schema +-- !query 56 schema struct --- !query 54 output +-- !query 56 output 1 A NULL NULL 2 B NULL NULL 3 C 3 C @@ -997,11 +1034,11 @@ NULL NULL 5 E NULL NULL 6 F --- !query 55 +-- !query 57 SELECT * FROM lowercasedata l RIGHT OUTER JOIN uppercasedata u ON l.n = u.N --- !query 55 schema +-- !query 57 schema struct --- !query 55 output +-- !query 57 output 1 a 1 A 2 b 2 B 3 c 3 C @@ -1010,11 +1047,11 @@ NULL NULL 5 E NULL NULL 6 F --- !query 56 +-- !query 58 SELECT * FROM uppercasedata u LEFT OUTER JOIN lowercasedata l ON l.n = u.N --- !query 56 schema +-- !query 58 schema struct --- !query 56 output +-- !query 58 output 1 A 1 a 2 B 2 b 3 C 3 c @@ -1023,36 +1060,36 @@ struct 6 F NULL NULL --- !query 57 +-- !query 59 SELECT * FROM upperCaseData JOIN lowerCaseData ON lowerCaseData.n = upperCaseData.N --- !query 57 schema +-- !query 59 schema struct --- !query 57 output +-- !query 59 output 1 A 1 a 2 B 2 b 3 C 3 c 4 D 4 d --- !query 58 +-- !query 60 SELECT upperCaseData.N, upperCaseData.L FROM upperCaseData JOIN lowerCaseData ON lowerCaseData.n = upperCaseData.N --- !query 58 schema +-- !query 60 schema struct --- !query 58 output +-- !query 60 output 1 A 2 B 3 C 4 D --- !query 59 +-- !query 61 SELECT a.key, b.key, c.key FROM testData a,testData b,testData c where a.key = b.key and a.key = c.key and a.key < 5 --- !query 59 schema +-- !query 61 schema struct --- !query 59 output +-- !query 61 output 1 1 1 2 2 2 3 3 3 From 7bf97391a2850cf5e8c5a647ff70347ed8336f43 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Wed, 17 Aug 2016 09:29:55 -0700 Subject: [PATCH 12/27] code clean --- .../resources/sql-tests/results/join.sql.out | 7 ++++++- .../org/apache/spark/sql/SQLQueryTestSuite.scala | 16 ++++++++++++++-- .../org/apache/spark/sql/test/SQLTestData.scala | 1 + 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/results/join.sql.out b/sql/core/src/test/resources/sql-tests/results/join.sql.out index 9b8f3c853efd5..d97d3e3dc64a3 100644 --- a/sql/core/src/test/resources/sql-tests/results/join.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/join.sql.out @@ -895,7 +895,7 @@ struct -- !query 46 SELECT x.key, x.value, subq1.value FROM -( SELECT x.key as key, x.value as value from src x where x.key < 20 +( SELECT x.key as key, x.value as value from src x where x.key < 200 UNION ALL SELECT x1.key as key, x1.value as value from src x1 where x1.key > 100 ) subq1 @@ -905,9 +905,14 @@ struct -- !query 46 output 165 NULL val_165 165 NULL val_165 +165 NULL val_165 +165 NULL val_165 +165 val_165 val_165 +165 val_165 val_165 165 val_165 val_165 165 val_165 val_165 330 val_330 val_330 +86 val_86 val_86 -- !query 47 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index 4dccbd89e258f..0196d71130947 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -27,6 +27,7 @@ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.catalyst.util.{fileToString, stringToFile} import org.apache.spark.sql.test.SharedSQLContext +import org.apache.spark.sql.test.SQLTestData.NullKeyValuePairs import org.apache.spark.sql.types.{IntegerType, StringType, StructType} /** @@ -267,8 +268,19 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext { session.read.schema(srcSchema).json(getFilePath("test-data/kv1.json")) .createOrReplaceTempView("src") - val src1Schema = new StructType().add("key", IntegerType).add("value", StringType) - session.read.schema(src1Schema).json(getFilePath("test-data/kv3.json")) + Seq((251, "val_251"), (86, "val_86"), (165, "val_165"), (330, "val_330"), (165, "val_165")) + .toDF("key", "value") + .createOrReplaceTempView("src") + + spark.sparkContext.parallelize( + Seq(NullKeyValuePairs(201, null), + NullKeyValuePairs(86, "val_86"), + NullKeyValuePairs(null, "val_null"), + NullKeyValuePairs(165, "val_165"), + NullKeyValuePairs(null, null), + NullKeyValuePairs(330, "val_330"), + NullKeyValuePairs(165, null)), 2) + .toDF("key", "value") .createOrReplaceTempView("src1") val srcpartSchema = new StructType().add("key", IntegerType).add("value", StringType) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestData.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestData.scala index 0cfe260e52152..deb092b16efef 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestData.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestData.scala @@ -305,6 +305,7 @@ private[sql] object SQLTestData { case class IntField(i: Int) case class NullInts(a: Integer) case class NullStrings(n: Int, s: String) + case class NullKeyValuePairs(key: Integer, value: String) case class TableName(tableName: String) case class Person(id: Int, name: String, age: Int) case class Salary(personId: Int, salary: Double) From 701bd742b8fbbc2f8d082f9b8873bab19672ae6b Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Wed, 17 Aug 2016 14:24:01 -0700 Subject: [PATCH 13/27] address comments --- .../src/test/resources/test-data/kv1.json | 5 ---- .../src/test/resources/test-data/kv3.json | 7 ----- .../src/test/resources/test-data/srcpart.json | 20 ------------- .../apache/spark/sql/SQLQueryTestSuite.scala | 28 ++++++++++++++----- 4 files changed, 21 insertions(+), 39 deletions(-) delete mode 100644 sql/core/src/test/resources/test-data/kv1.json delete mode 100644 sql/core/src/test/resources/test-data/kv3.json delete mode 100644 sql/core/src/test/resources/test-data/srcpart.json diff --git a/sql/core/src/test/resources/test-data/kv1.json b/sql/core/src/test/resources/test-data/kv1.json deleted file mode 100644 index 7bd430e36dba1..0000000000000 --- a/sql/core/src/test/resources/test-data/kv1.json +++ /dev/null @@ -1,5 +0,0 @@ -{"key":251,"value":"val_251"} -{"key":86,"value":"val_86"} -{"key":165,"value":"val_165"} -{"key":330,"value":"val_330"} -{"key":165,"value":"val_165"} diff --git a/sql/core/src/test/resources/test-data/kv3.json b/sql/core/src/test/resources/test-data/kv3.json deleted file mode 100644 index 9bb1555f7df02..0000000000000 --- a/sql/core/src/test/resources/test-data/kv3.json +++ /dev/null @@ -1,7 +0,0 @@ -{"key":201,"value":null} -{"key":86,"value":"val_86"} -{"key":null,"value":"val_null"} -{"key":165,"value":"val_165"} -{"key":null,"value":null} -{"key":330,"value":"val_330"} -{"key":165,"value":null} diff --git a/sql/core/src/test/resources/test-data/srcpart.json b/sql/core/src/test/resources/test-data/srcpart.json deleted file mode 100644 index aea3b4cfd7e3e..0000000000000 --- a/sql/core/src/test/resources/test-data/srcpart.json +++ /dev/null @@ -1,20 +0,0 @@ -{"key":251,"value":"val_251","ds":"2008-04-08","hr":"11"} -{"key":251,"value":"val_251","ds":"2008-04-09","hr":"11"} -{"key":251,"value":"val_251","ds":"2008-04-08","hr":"12"} -{"key":251,"value":"val_251","ds":"2008-04-09","hr":"12"} -{"key":86,"value":"val_86","ds":"2008-04-08","hr":"11"} -{"key":86,"value":"val_86","ds":"2008-04-09","hr":"11"} -{"key":86,"value":"val_86","ds":"2008-04-08","hr":"12"} -{"key":86,"value":"val_86","ds":"2008-04-09","hr":"12"} -{"key":165,"value":"val_165","ds":"2008-04-08","hr":"11"} -{"key":165,"value":"val_165","ds":"2008-04-09","hr":"11"} -{"key":165,"value":"val_165","ds":"2008-04-08","hr":"12"} -{"key":165,"value":"val_165","ds":"2008-04-09","hr":"12"} -{"key":330,"value":"val_330","ds":"2008-04-08","hr":"11"} -{"key":330,"value":"val_330","ds":"2008-04-09","hr":"11"} -{"key":330,"value":"val_330","ds":"2008-04-08","hr":"12"} -{"key":330,"value":"val_330","ds":"2008-04-09","hr":"12"} -{"key":165,"value":"val_165","ds":"2008-04-08","hr":"11"} -{"key":165,"value":"val_165","ds":"2008-04-09","hr":"11"} -{"key":165,"value":"val_165","ds":"2008-04-08","hr":"12"} -{"key":165,"value":"val_165","ds":"2008-04-09","hr":"12"} diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index 0196d71130947..f80422970b524 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -264,10 +264,6 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext { .toDF("N", "L") .createOrReplaceTempView("upperCaseData") - val srcSchema = new StructType().add("key", IntegerType).add("value", StringType) - session.read.schema(srcSchema).json(getFilePath("test-data/kv1.json")) - .createOrReplaceTempView("src") - Seq((251, "val_251"), (86, "val_86"), (165, "val_165"), (330, "val_330"), (165, "val_165")) .toDF("key", "value") .createOrReplaceTempView("src") @@ -283,9 +279,27 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext { .toDF("key", "value") .createOrReplaceTempView("src1") - val srcpartSchema = new StructType().add("key", IntegerType).add("value", StringType) - .add("ds", StringType).add("hr", StringType) - session.read.schema(srcpartSchema).json(getFilePath("test-data/srcpart.json")) + Seq((251, "val_251", "2008-04-08", "11"), + (251, "val_251", "2008-04-09", "11"), + (251, "val_251", "2008-04-08", "12"), + (251, "val_251", "2008-04-09", "12"), + (86, "val_86", "2008-04-08", "11"), + (86, "val_86", "2008-04-09", "11"), + (86, "val_86", "2008-04-08", "12"), + (86, "val_86", "2008-04-09", "12"), + (165, "val_165", "2008-04-08", "11"), + (165, "val_165", "2008-04-09", "11"), + (165, "val_165", "2008-04-08", "12"), + (165, "val_165", "2008-04-09", "12"), + (330, "val_330", "2008-04-08", "11"), + (330, "val_330", "2008-04-09", "11"), + (330, "val_330", "2008-04-08", "12"), + (330, "val_330", "2008-04-09", "12"), + (165, "val_165", "2008-04-08", "11"), + (165, "val_165", "2008-04-09", "11"), + (165, "val_165", "2008-04-08", "12"), + (165, "val_165", "2008-04-09", "12")) + .toDF("key", "value", "ds", "hr") .createOrReplaceTempView("srcpart") } From 376e99adf737b0df4e338280ea168bc1d8c42958 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Wed, 17 Aug 2016 14:47:26 -0700 Subject: [PATCH 14/27] add using joins. --- .../resources/sql-tests/inputs/using-join.sql | 43 ++++++ .../sql-tests/results/using-join.sql.out | 132 ++++++++++++++++++ .../org/apache/spark/sql/SQLQuerySuite.scala | 44 ------ 3 files changed, 175 insertions(+), 44 deletions(-) create mode 100644 sql/core/src/test/resources/sql-tests/inputs/using-join.sql create mode 100644 sql/core/src/test/resources/sql-tests/results/using-join.sql.out diff --git a/sql/core/src/test/resources/sql-tests/inputs/using-join.sql b/sql/core/src/test/resources/sql-tests/inputs/using-join.sql new file mode 100644 index 0000000000000..eb4e6332965f4 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/using-join.sql @@ -0,0 +1,43 @@ +create temporary view ut1 as select * from values + ("r1c1", "r1c2", "t1r1c3"), + ("r2c1", "r2c2", "t1r2c3"), + ("r3c1x", "r3c2", "t1r3c3") + as ut1(c1, c2, c3); + +create temporary view ut2 as select * from values + ("r1c1", "r1c2", "t2r1c3"), + ("r2c1", "r2c2", "t2r2c3"), + ("r3c1y", "r3c2", "t2r3c3") + as ut2(c1, c2, c3); + +create temporary view ut3 as select * from values + (null, "r1c2", "t3r1c3"), + ("r2c1", "r2c2", "t3r2c3"), + ("r3c1y", "r3c2", "t3r3c3") + as ut3(c1, c2, c3); + +-- inner join with one using column +SELECT * FROM ut1 join ut2 using (c1); + +-- inner join with two using columns +SELECT * FROM ut1 join ut2 using (c1, c2); + +-- Left outer join with one using column. +SELECT * FROM ut1 left join ut2 using (c1); + +-- Right outer join with one using column. +SELECT * FROM ut1 right join ut2 using (c1); + +-- Full outer join with one using column. +SELECT * FROM ut1 full outer join ut2 using (c1); + +-- Full outer join with null value in join column. +SELECT * FROM ut1 full outer join ut3 using (c1); + +-- Self join with using columns. +SELECT * FROM ut1 join ut1 using (c1); + +-- clean up the temporary tables +drop view ut1; +drop view ut2; +drop view ut3; \ No newline at end of file diff --git a/sql/core/src/test/resources/sql-tests/results/using-join.sql.out b/sql/core/src/test/resources/sql-tests/results/using-join.sql.out new file mode 100644 index 0000000000000..09065a80461de --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/using-join.sql.out @@ -0,0 +1,132 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 13 + + +-- !query 0 +create temporary view ut1 as select * from values + ("r1c1", "r1c2", "t1r1c3"), + ("r2c1", "r2c2", "t1r2c3"), + ("r3c1x", "r3c2", "t1r3c3") + as ut1(c1, c2, c3) +-- !query 0 schema +struct<> +-- !query 0 output + + + +-- !query 1 +create temporary view ut2 as select * from values + ("r1c1", "r1c2", "t2r1c3"), + ("r2c1", "r2c2", "t2r2c3"), + ("r3c1y", "r3c2", "t2r3c3") + as ut2(c1, c2, c3) +-- !query 1 schema +struct<> +-- !query 1 output + + + +-- !query 2 +create temporary view ut3 as select * from values + (null, "r1c2", "t3r1c3"), + ("r2c1", "r2c2", "t3r2c3"), + ("r3c1y", "r3c2", "t3r3c3") + as ut3(c1, c2, c3) +-- !query 2 schema +struct<> +-- !query 2 output +scala.MatchError +NullType (of class org.apache.spark.sql.types.NullType$) + + +-- !query 3 +SELECT * FROM ut1 join ut2 using (c1) +-- !query 3 schema +struct +-- !query 3 output +r1c1 r1c2 t1r1c3 r1c2 t2r1c3 +r2c1 r2c2 t1r2c3 r2c2 t2r2c3 + + +-- !query 4 +SELECT * FROM ut1 join ut2 using (c1, c2) +-- !query 4 schema +struct +-- !query 4 output +r1c1 r1c2 t1r1c3 t2r1c3 +r2c1 r2c2 t1r2c3 t2r2c3 + + +-- !query 5 +SELECT * FROM ut1 left join ut2 using (c1) +-- !query 5 schema +struct +-- !query 5 output +r1c1 r1c2 t1r1c3 r1c2 t2r1c3 +r2c1 r2c2 t1r2c3 r2c2 t2r2c3 +r3c1x r3c2 t1r3c3 NULL NULL + + +-- !query 6 +SELECT * FROM ut1 right join ut2 using (c1) +-- !query 6 schema +struct +-- !query 6 output +r1c1 r1c2 t1r1c3 r1c2 t2r1c3 +r2c1 r2c2 t1r2c3 r2c2 t2r2c3 +r3c1y NULL NULL r3c2 t2r3c3 + + +-- !query 7 +SELECT * FROM ut1 full outer join ut2 using (c1) +-- !query 7 schema +struct +-- !query 7 output +r1c1 r1c2 t1r1c3 r1c2 t2r1c3 +r2c1 r2c2 t1r2c3 r2c2 t2r2c3 +r3c1x r3c2 t1r3c3 NULL NULL +r3c1y NULL NULL r3c2 t2r3c3 + + +-- !query 8 +SELECT * FROM ut1 full outer join ut3 using (c1) +-- !query 8 schema +struct<> +-- !query 8 output +org.apache.spark.sql.AnalysisException +Table or view not found: ut3; line 1 pos 34 + + +-- !query 9 +SELECT * FROM ut1 join ut1 using (c1) +-- !query 9 schema +struct +-- !query 9 output +r1c1 r1c2 t1r1c3 r1c2 t1r1c3 +r2c1 r2c2 t1r2c3 r2c2 t1r2c3 +r3c1x r3c2 t1r3c3 r3c2 t1r3c3 + + +-- !query 10 +drop view ut1 +-- !query 10 schema +struct<> +-- !query 10 output + + + +-- !query 11 +drop view ut2 +-- !query 11 schema +struct<> +-- !query 11 output + + + +-- !query 12 +drop view ut3 +-- !query 12 schema +struct<> +-- !query 12 output +org.apache.spark.sql.AnalysisException +View to drop '`ut3`' does not exist; diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index e936cdf0843ba..4e0f0c680c4db 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -2052,48 +2052,11 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { test("join with using clause") { val df1 = Seq(("r1c1", "r1c2", "t1r1c3"), ("r2c1", "r2c2", "t1r2c3"), ("r3c1x", "r3c2", "t1r3c3")).toDF("c1", "c2", "c3") - val df2 = Seq(("r1c1", "r1c2", "t2r1c3"), - ("r2c1", "r2c2", "t2r2c3"), ("r3c1y", "r3c2", "t2r3c3")).toDF("c1", "c2", "c3") val df3 = Seq((null, "r1c2", "t3r1c3"), ("r2c1", "r2c2", "t3r2c3"), ("r3c1y", "r3c2", "t3r3c3")).toDF("c1", "c2", "c3") withTempView("t1", "t2", "t3") { df1.createOrReplaceTempView("t1") - df2.createOrReplaceTempView("t2") df3.createOrReplaceTempView("t3") - // inner join with one using column - checkAnswer( - sql("SELECT * FROM t1 join t2 using (c1)"), - Row("r1c1", "r1c2", "t1r1c3", "r1c2", "t2r1c3") :: - Row("r2c1", "r2c2", "t1r2c3", "r2c2", "t2r2c3") :: Nil) - - // inner join with two using columns - checkAnswer( - sql("SELECT * FROM t1 join t2 using (c1, c2)"), - Row("r1c1", "r1c2", "t1r1c3", "t2r1c3") :: - Row("r2c1", "r2c2", "t1r2c3", "t2r2c3") :: Nil) - - // Left outer join with one using column. - checkAnswer( - sql("SELECT * FROM t1 left join t2 using (c1)"), - Row("r1c1", "r1c2", "t1r1c3", "r1c2", "t2r1c3") :: - Row("r2c1", "r2c2", "t1r2c3", "r2c2", "t2r2c3") :: - Row("r3c1x", "r3c2", "t1r3c3", null, null) :: Nil) - - // Right outer join with one using column. - checkAnswer( - sql("SELECT * FROM t1 right join t2 using (c1)"), - Row("r1c1", "r1c2", "t1r1c3", "r1c2", "t2r1c3") :: - Row("r2c1", "r2c2", "t1r2c3", "r2c2", "t2r2c3") :: - Row("r3c1y", null, null, "r3c2", "t2r3c3") :: Nil) - - // Full outer join with one using column. - checkAnswer( - sql("SELECT * FROM t1 full outer join t2 using (c1)"), - Row("r1c1", "r1c2", "t1r1c3", "r1c2", "t2r1c3") :: - Row("r2c1", "r2c2", "t1r2c3", "r2c2", "t2r2c3") :: - Row("r3c1x", "r3c2", "t1r3c3", null, null) :: - Row("r3c1y", null, - null, "r3c2", "t2r3c3") :: Nil) // Full outer join with null value in join column. checkAnswer( @@ -2103,13 +2066,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { Row("r3c1x", "r3c2", "t1r3c3", null, null) :: Row("r3c1y", null, null, "r3c2", "t3r3c3") :: Row(null, null, null, "r1c2", "t3r1c3") :: Nil) - - // Self join with using columns. - checkAnswer( - sql("SELECT * FROM t1 join t1 using (c1)"), - Row("r1c1", "r1c2", "t1r1c3", "r1c2", "t1r1c3") :: - Row("r2c1", "r2c2", "t1r2c3", "r2c2", "t1r2c3") :: - Row("r3c1x", "r3c2", "t1r3c3", "r3c2", "t1r3c3") :: Nil) } } From 7c6f85ad95cdeb3f40e742b8047a28adf535e3f3 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Wed, 17 Aug 2016 15:56:22 -0700 Subject: [PATCH 15/27] using cast for null. --- .../resources/sql-tests/inputs/using-join.sql | 2 +- .../sql-tests/results/using-join.sql.out | 17 ++++++------ .../org/apache/spark/sql/SQLQuerySuite.scala | 20 -------------- .../apache/spark/sql/SQLQueryTestSuite.scala | 27 ++++++++++--------- .../apache/spark/sql/test/SQLTestData.scala | 1 - 5 files changed, 25 insertions(+), 42 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/using-join.sql b/sql/core/src/test/resources/sql-tests/inputs/using-join.sql index eb4e6332965f4..67284c20ea6e8 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/using-join.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/using-join.sql @@ -11,7 +11,7 @@ create temporary view ut2 as select * from values as ut2(c1, c2, c3); create temporary view ut3 as select * from values - (null, "r1c2", "t3r1c3"), + (CAST(null as String), "r1c2", "t3r1c3"), ("r2c1", "r2c2", "t3r2c3"), ("r3c1y", "r3c2", "t3r3c3") as ut3(c1, c2, c3); diff --git a/sql/core/src/test/resources/sql-tests/results/using-join.sql.out b/sql/core/src/test/resources/sql-tests/results/using-join.sql.out index 09065a80461de..53930d628d817 100644 --- a/sql/core/src/test/resources/sql-tests/results/using-join.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/using-join.sql.out @@ -28,15 +28,14 @@ struct<> -- !query 2 create temporary view ut3 as select * from values - (null, "r1c2", "t3r1c3"), + (CAST(null as String), "r1c2", "t3r1c3"), ("r2c1", "r2c2", "t3r2c3"), ("r3c1y", "r3c2", "t3r3c3") as ut3(c1, c2, c3) -- !query 2 schema struct<> -- !query 2 output -scala.MatchError -NullType (of class org.apache.spark.sql.types.NullType$) + -- !query 3 @@ -91,10 +90,13 @@ r3c1y NULL NULL r3c2 t2r3c3 -- !query 8 SELECT * FROM ut1 full outer join ut3 using (c1) -- !query 8 schema -struct<> +struct -- !query 8 output -org.apache.spark.sql.AnalysisException -Table or view not found: ut3; line 1 pos 34 +NULL NULL NULL r1c2 t3r1c3 +r1c1 r1c2 t1r1c3 NULL NULL +r2c1 r2c2 t1r2c3 r2c2 t3r2c3 +r3c1x r3c2 t1r3c3 NULL NULL +r3c1y NULL NULL r3c2 t3r3c3 -- !query 9 @@ -128,5 +130,4 @@ drop view ut3 -- !query 12 schema struct<> -- !query 12 output -org.apache.spark.sql.AnalysisException -View to drop '`ut3`' does not exist; + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 4e0f0c680c4db..1705989216957 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -2049,26 +2049,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { } } - test("join with using clause") { - val df1 = Seq(("r1c1", "r1c2", "t1r1c3"), - ("r2c1", "r2c2", "t1r2c3"), ("r3c1x", "r3c2", "t1r3c3")).toDF("c1", "c2", "c3") - val df3 = Seq((null, "r1c2", "t3r1c3"), - ("r2c1", "r2c2", "t3r2c3"), ("r3c1y", "r3c2", "t3r3c3")).toDF("c1", "c2", "c3") - withTempView("t1", "t2", "t3") { - df1.createOrReplaceTempView("t1") - df3.createOrReplaceTempView("t3") - - // Full outer join with null value in join column. - checkAnswer( - sql("SELECT * FROM t1 full outer join t3 using (c1)"), - Row("r1c1", "r1c2", "t1r1c3", null, null) :: - Row("r2c1", "r2c2", "t1r2c3", "r2c2", "t3r2c3") :: - Row("r3c1x", "r3c2", "t1r3c3", null, null) :: - Row("r3c1y", null, null, "r3c2", "t3r3c3") :: - Row(null, null, null, "r1c2", "t3r1c3") :: Nil) - } - } - test("SPARK-15327: fail to compile generated code with complex data structure") { withTempDir{ dir => val json = diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index f80422970b524..25dbee747f4a1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -27,8 +27,7 @@ import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules.RuleExecutor import org.apache.spark.sql.catalyst.util.{fileToString, stringToFile} import org.apache.spark.sql.test.SharedSQLContext -import org.apache.spark.sql.test.SQLTestData.NullKeyValuePairs -import org.apache.spark.sql.types.{IntegerType, StringType, StructType} +import org.apache.spark.sql.types.StructType /** * End-to-end test cases for SQL queries. @@ -268,16 +267,20 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext { .toDF("key", "value") .createOrReplaceTempView("src") - spark.sparkContext.parallelize( - Seq(NullKeyValuePairs(201, null), - NullKeyValuePairs(86, "val_86"), - NullKeyValuePairs(null, "val_null"), - NullKeyValuePairs(165, "val_165"), - NullKeyValuePairs(null, null), - NullKeyValuePairs(330, "val_330"), - NullKeyValuePairs(165, null)), 2) - .toDF("key", "value") - .createOrReplaceTempView("src1") + session.sql( + """ + |CREATE OR REPLACE TEMPORARY VIEW src1 AS SELECT * FROM VALUES + |(201, CAST(null as String)), + |(86, "val_86"), + |(CAST(null as int), "val_null"), + |(165, "val_165"), + |(CAST(null as int), CAST(null as String)), + |(330, "val_330"), + |(165, CAST(null as String)) + |as src1(key, value) + """.stripMargin) + + session.sql("select * from src1").show(false) Seq((251, "val_251", "2008-04-08", "11"), (251, "val_251", "2008-04-09", "11"), diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestData.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestData.scala index deb092b16efef..0cfe260e52152 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestData.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestData.scala @@ -305,7 +305,6 @@ private[sql] object SQLTestData { case class IntField(i: Int) case class NullInts(a: Integer) case class NullStrings(n: Int, s: String) - case class NullKeyValuePairs(key: Integer, value: String) case class TableName(tableName: String) case class Person(id: Int, name: String, age: Int) case class Salary(personId: Int, salary: Double) From 4bd38d2610fad3adb58755f4564117f8be74fe1f Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Wed, 17 Aug 2016 16:23:38 -0700 Subject: [PATCH 16/27] move outer joins to outer-join.sql and move left-semi join to left-semi-join.sql --- .../test/resources/sql-tests/inputs/join.sql | 274 ------- .../sql-tests/inputs/left-semi-join.sql | 8 + .../resources/sql-tests/inputs/outer-join.sql | 265 +++++++ .../resources/sql-tests/results/join.sql.out | 739 ++---------------- .../sql-tests/results/left-semi-join.sql.out | 31 + .../sql-tests/results/outer-join.sql.out | 550 +++++++++++++ .../apache/spark/sql/SQLQueryTestSuite.scala | 2 - 7 files changed, 935 insertions(+), 934 deletions(-) create mode 100644 sql/core/src/test/resources/sql-tests/inputs/left-semi-join.sql create mode 100644 sql/core/src/test/resources/sql-tests/inputs/outer-join.sql create mode 100644 sql/core/src/test/resources/sql-tests/results/left-semi-join.sql.out create mode 100644 sql/core/src/test/resources/sql-tests/results/outer-join.sql.out diff --git a/sql/core/src/test/resources/sql-tests/inputs/join.sql b/sql/core/src/test/resources/sql-tests/inputs/join.sql index 9e27c1404447b..8f4fff2ca534c 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/join.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/join.sql @@ -23,87 +23,6 @@ SELECT src1.key, src3.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key); --- left-outer join over two nested table expressions (auto_join4.q) -SELECT c.c1, c.c2, c.c3, c.c4 -FROM ( - SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 - FROM - ( - SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 100 and src1.key < 300 - ) a - LEFT OUTER JOIN - ( - SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 200 and src2.key < 400 - ) b - ON (a.c1 = b.c3) -) c; - --- right-outer join over two nested table expressions (auto_join5.q) -SELECT c.c1, c.c2, c.c3, c.c4 -FROM ( - SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 - FROM - ( - SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 100 and src1.key < 300 - ) a - RIGHT OUTER JOIN - ( - SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 200 and src2.key < 400 - ) b - ON (a.c1 = b.c3) -) c; - --- full-outer join over two nested table expressions (auto_join6.q) -SELECT c.c1, c.c2, c.c3, c.c4 -FROM ( - SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 - FROM - ( - SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 100 and src1.key < 300 - ) a - FULL OUTER JOIN - ( - SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 200 and src2.key < 400 - ) b - ON (a.c1 = b.c3) -) c; - --- full-outer join + left-outer join over nested table expressions (auto_join7.q) -SELECT c.c1, c.c2, c.c3, c.c4, c.c5, c.c6 -FROM ( - SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4, c.c5 AS c5, c.c6 AS c6 - FROM - ( - SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 10 and src1.key < 150 - ) a - FULL OUTER JOIN - ( - SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 150 and src2.key < 300 - ) b - ON (a.c1 = b.c3) - LEFT OUTER JOIN - ( - SELECT src3.key AS c5, src3.value AS c6 FROM src src3 WHERE src3.key > 200 and src3.key < 400 - ) c - ON (a.c1 = c.c5) -) c; - --- left-outer join + join condition + filter (auto_join8.q) -SELECT c.c1, c.c2, c.c3, c.c4 -FROM ( - SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 - FROM - ( - SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 100 and src1.key < 300 - ) a - LEFT OUTER JOIN - ( - SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 200 and src2.key < 400 - ) b - ON (a.c1 = b.c3) -) c -where c.c3 IS NULL AND c.c1 IS NOT NULL; - -- inner join + join condition + filter (auto_join9.q) SELECT src1.key, src2.value FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) @@ -171,64 +90,11 @@ where tab.key < 200; SELECT src1.*, src2.* FROM src src1 JOIN src src2 ON (src1.key = src2.key); --- full outer join over Aggregate (auto_join18.q) -SELECT a.key, a.value, b.key, b.value -FROM - ( - SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key - ) a - FULL OUTER JOIN - ( - SELECT src2.key as key, count(distinct(src2.value)) AS value - FROM src1 src2 group by src2.key - ) b -ON (a.key = b.key); - --- full outer join + multi distinct (auto_join18_multi_distinct.q) -SELECT a.key, a.value, b.key, b.value1, b.value2 -FROM - ( - SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key - ) a - FULL OUTER JOIN - ( - SELECT src2.key as key, count(distinct(src2.value)) AS value1, - count(distinct(src2.key)) AS value2 - FROM src1 src2 group by src2.key - ) b -ON (a.key = b.key); - -- join + disjunctive conditions (auto_join19.q) SELECT src1.key, src2.value FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11'); --- inner join + right-outer join #1 (auto_join20.q) -SELECT a.k1,a.v1,a.k2,a.v2,a.k3,a.v3 -FROM ( - SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 - FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 200) - RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 300) - SORT BY k1,v1,k2,v2,k3,v3 -)a; - --- inner join + right-outer join #2 (auto_join20.q) -SELECT a.k1,a.v1,a.k2,a.v2,a.k3,a.v3 -FROM ( - SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 - FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key < 100) - RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 300) - SORT BY k1,v1,k2,v2,k3,v3 -)a; - --- left outer join + right outer join (auto_join21.q) -SELECT * -FROM - src src1 - LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) - RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) -SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; - -- nested join (auto_join22.q) SELECT src5.src1_value FROM @@ -265,54 +131,6 @@ JOIN ) src3 ON src_12.key = src3.k AND src3.k < 300; --- left outer + right outer (auto_join28.q) -SELECT * FROM src src1 - LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) - RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) - SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; - --- right outer + right outer (auto_join28.q) -SELECT * FROM src src1 - RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) - RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) - SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; - --- left outer + left outer (auto_join28.q) -SELECT * FROM src src1 - LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) - LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) - SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; - --- right outer + left outer (auto_join28.q) -SELECT * FROM src src1 - RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) - LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) - SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; - --- inner + left outer (auto_join29.q) -SELECT * FROM src src1 - JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) - LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) - SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; - --- inner + right outer (auto_join29.q) -SELECT * FROM src src1 - JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) - RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) - SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; - --- left + inner outer (auto_join29.q) -SELECT * FROM src src1 - LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) - JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) - SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; - --- right + inner join (auto_join29.q) -SELECT * FROM src src1 - RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) - JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) - SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; - -- inner join with sorted by nested table expression (auto_join30.q) FROM (SELECT src.* FROM src sort by key) x @@ -321,22 +139,6 @@ JOIN ON (x.key = Y.key) select Y.key,Y.value; --- left outer join with sorted by nested table expression (auto_join30.q) -FROM -(SELECT src.* FROM src sort by key) x -LEFT OUTER JOIN -(SELECT src.* FROM src sort by value) Y -ON (x.key = Y.key) -select Y.key,Y.value; - --- right outer join with sorted by nested table expression (auto_join30.q) -FROM -(SELECT src.* FROM src sort by key) x -RIGHT OUTER JOIN -(SELECT src.* FROM src sort by value) Y -ON (x.key = Y.key) -select Y.key,Y.value; - -- inner + inner with sorted by nested table expression (auto_join30.q) FROM (SELECT src.* FROM src sort by key) x @@ -348,61 +150,6 @@ JOIN ON (x.key = Z.key) select Y.key,Y.value; --- inner + left outer with sorted by nested table expression (auto_join30.q) -FROM -(SELECT src.* FROM src sort by key) x -JOIN -(SELECT src.* FROM src sort by value) Y -ON (x.key = Y.key) -LEFT OUTER JOIN -(SELECT src.* FROM src sort by value) Z -ON (x.key = Z.key) -select Y.key,Y.value; - --- left + left outer with sorted by nested table expression (auto_join30.q) -FROM -(SELECT src.* FROM src sort by key) x -LEFT OUTER JOIN -(SELECT src.* FROM src sort by value) Y -ON (x.key = Y.key) -LEFT OUTER JOIN -(SELECT src.* FROM src sort by value) Z -ON (x.key = Z.key) -select Y.key,Y.value; - --- left + right outer with sorted by nested table expression (auto_join30.q) -FROM -(SELECT src.* FROM src sort by key) x -LEFT OUTER JOIN -(SELECT src.* FROM src sort by value) Y -ON (x.key = Y.key) -RIGHT OUTER JOIN -(SELECT src.* FROM src sort by value) Z -ON (x.key = Z.key) -select Y.key,Y.value; - --- right + right outer with sorted by nested table expression (auto_join30.q) -FROM -(SELECT src.* FROM src sort by key) x -RIGHT OUTER JOIN -(SELECT src.* FROM src sort by value) Y -ON (x.key = Y.key) -RIGHT OUTER JOIN -(SELECT src.* FROM src sort by value) Z -ON (x.key = Z.key) -select Y.key,Y.value; - --- right outer + inner with sorted by nested table expression (auto_join31.q) -FROM -(SELECT src.* FROM src sort by key) x -RIGHT OUTER JOIN -(SELECT src.* FROM src sort by value) Y -ON (x.key = Y.key) -JOIN -(SELECT src.* FROM src sort by value) Z -ON (x.key = Z.key) -select Y.key,Y.value; - -- join over set operation (join34.q) SELECT x.key, x.value, subq1.value FROM @@ -426,15 +173,6 @@ SELECT x.key, COUNT(*) FROM src x JOIN src y ON x.key = y.key GROUP BY x.key; --- left semi greater than predicate -SELECT * FROM testData2 x LEFT SEMI JOIN testData2 y ON x.a >= y.a + 2; - --- left semi greater than predicate and equal operator #1 -SELECT * FROM testData2 x LEFT SEMI JOIN testData2 y ON x.b = y.b and x.a >= y.a + 2; - --- left semi greater than predicate and equal operator #2 -SELECT * FROM testData2 x LEFT SEMI JOIN testData2 y ON x.b = y.a and x.a >= y.b + 1; - -- inner join with one-match-per-row filtering predicates (where) SELECT * FROM uppercasedata u JOIN lowercasedata l WHERE u.n = l.N; @@ -453,18 +191,6 @@ SELECT * FROM (SELECT * FROM testData2 WHERE a = 2) y WHERE x.a = y.a; --- basic full outer join -SELECT * FROM - (SELECT * FROM upperCaseData WHERE N <= 4) leftTable FULL OUTER JOIN - (SELECT * FROM upperCaseData WHERE N >= 3) rightTable - ON leftTable.N = rightTable.N; - --- basic right outer join -SELECT * FROM lowercasedata l RIGHT OUTER JOIN uppercasedata u ON l.n = u.N; - --- basic left outer join -SELECT * FROM uppercasedata u LEFT OUTER JOIN lowercasedata l ON l.n = u.N; - -- inner join ON with table name as qualifier SELECT * FROM upperCaseData JOIN lowerCaseData ON lowerCaseData.n = upperCaseData.N; diff --git a/sql/core/src/test/resources/sql-tests/inputs/left-semi-join.sql b/sql/core/src/test/resources/sql-tests/inputs/left-semi-join.sql new file mode 100644 index 0000000000000..19a6bfb9e7753 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/left-semi-join.sql @@ -0,0 +1,8 @@ +-- left semi greater than predicate +SELECT * FROM testData2 x LEFT SEMI JOIN testData2 y ON x.a >= y.a + 2; + +-- left semi greater than predicate and equal operator #1 +SELECT * FROM testData2 x LEFT SEMI JOIN testData2 y ON x.b = y.b and x.a >= y.a + 2; + +-- left semi greater than predicate and equal operator #2 +SELECT * FROM testData2 x LEFT SEMI JOIN testData2 y ON x.b = y.a and x.a >= y.b + 1; \ No newline at end of file diff --git a/sql/core/src/test/resources/sql-tests/inputs/outer-join.sql b/sql/core/src/test/resources/sql-tests/inputs/outer-join.sql new file mode 100644 index 0000000000000..f9be371e4e54c --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/outer-join.sql @@ -0,0 +1,265 @@ +-- basic full outer join +SELECT * FROM + (SELECT * FROM upperCaseData WHERE N <= 4) leftTable FULL OUTER JOIN + (SELECT * FROM upperCaseData WHERE N >= 3) rightTable + ON leftTable.N = rightTable.N; + +-- basic right outer join +SELECT * FROM lowercasedata l RIGHT OUTER JOIN uppercasedata u ON l.n = u.N; + +-- basic left outer join +SELECT * FROM uppercasedata u LEFT OUTER JOIN lowercasedata l ON l.n = u.N; + + +-- left-outer join over two nested table expressions (auto_join4.q) +SELECT c.c1, c.c2, c.c3, c.c4 +FROM ( + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 100 and src1.key < 300 + ) a + LEFT OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 200 and src2.key < 400 + ) b + ON (a.c1 = b.c3) +) c; + +-- right-outer join over two nested table expressions (auto_join5.q) +SELECT c.c1, c.c2, c.c3, c.c4 +FROM ( + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 100 and src1.key < 300 + ) a + RIGHT OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 200 and src2.key < 400 + ) b + ON (a.c1 = b.c3) +) c; + +-- full-outer join over two nested table expressions (auto_join6.q) +SELECT c.c1, c.c2, c.c3, c.c4 +FROM ( + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 100 and src1.key < 300 + ) a + FULL OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 200 and src2.key < 400 + ) b + ON (a.c1 = b.c3) +) c; + +-- full-outer join + left-outer join over nested table expressions (auto_join7.q) +SELECT c.c1, c.c2, c.c3, c.c4, c.c5, c.c6 +FROM ( + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4, c.c5 AS c5, c.c6 AS c6 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 10 and src1.key < 150 + ) a + FULL OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 150 and src2.key < 300 + ) b + ON (a.c1 = b.c3) + LEFT OUTER JOIN + ( + SELECT src3.key AS c5, src3.value AS c6 FROM src src3 WHERE src3.key > 200 and src3.key < 400 + ) c + ON (a.c1 = c.c5) +) c; + +-- left-outer join + join condition + filter (auto_join8.q) +SELECT c.c1, c.c2, c.c3, c.c4 +FROM ( + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 100 and src1.key < 300 + ) a + LEFT OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 200 and src2.key < 400 + ) b + ON (a.c1 = b.c3) +) c +where c.c3 IS NULL AND c.c1 IS NOT NULL; + +-- full outer join over Aggregate (auto_join18.q) +SELECT a.key, a.value, b.key, b.value +FROM + ( + SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key + ) a + FULL OUTER JOIN + ( + SELECT src2.key as key, count(distinct(src2.value)) AS value + FROM src1 src2 group by src2.key + ) b +ON (a.key = b.key); + +-- full outer join + multi distinct (auto_join18_multi_distinct.q) +SELECT a.key, a.value, b.key, b.value1, b.value2 +FROM + ( + SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key + ) a + FULL OUTER JOIN + ( + SELECT src2.key as key, count(distinct(src2.value)) AS value1, + count(distinct(src2.key)) AS value2 + FROM src1 src2 group by src2.key + ) b +ON (a.key = b.key); + +-- inner join + right-outer join #1 (auto_join20.q) +SELECT a.k1,a.v1,a.k2,a.v2,a.k3,a.v3 +FROM ( + SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 + FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 200) + RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 300) + SORT BY k1,v1,k2,v2,k3,v3 +)a; + +-- inner join + right-outer join #2 (auto_join20.q) +SELECT a.k1,a.v1,a.k2,a.v2,a.k3,a.v3 +FROM ( + SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 + FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key < 100) + RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 300) + SORT BY k1,v1,k2,v2,k3,v3 +)a; + +-- left outer join + right outer join (auto_join21.q) +SELECT * +FROM + src src1 + LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) +SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; + +-- left outer + right outer (auto_join28.q) +SELECT * FROM src src1 + LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; + +-- right outer + right outer (auto_join28.q) +SELECT * FROM src src1 + RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; + +-- left outer + left outer (auto_join28.q) +SELECT * FROM src src1 + LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; + +-- right outer + left outer (auto_join28.q) +SELECT * FROM src src1 + RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; + +-- inner + left outer (auto_join29.q) +SELECT * FROM src src1 + JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; + +-- inner + right outer (auto_join29.q) +SELECT * FROM src src1 + JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; + +-- left + inner outer (auto_join29.q) +SELECT * FROM src src1 + LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; + +-- right + inner join (auto_join29.q) +SELECT * FROM src src1 + RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; + +-- left outer join with sorted by nested table expression (auto_join30.q) +FROM +(SELECT src.* FROM src sort by key) x +LEFT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +select Y.key,Y.value; + +-- right outer join with sorted by nested table expression (auto_join30.q) +FROM +(SELECT src.* FROM src sort by key) x +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +select Y.key,Y.value; + +-- inner + left outer with sorted by nested table expression (auto_join30.q) +FROM +(SELECT src.* FROM src sort by key) x +JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value; + +-- left + left outer with sorted by nested table expression (auto_join30.q) +FROM +(SELECT src.* FROM src sort by key) x +LEFT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value; + +-- left + right outer with sorted by nested table expression (auto_join30.q) +FROM +(SELECT src.* FROM src sort by key) x +LEFT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value; + +-- right + right outer with sorted by nested table expression (auto_join30.q) +FROM +(SELECT src.* FROM src sort by key) x +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value; + +-- right outer + inner with sorted by nested table expression (auto_join31.q) +FROM +(SELECT src.* FROM src sort by key) x +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value; diff --git a/sql/core/src/test/resources/sql-tests/results/join.sql.out b/sql/core/src/test/resources/sql-tests/results/join.sql.out index d97d3e3dc64a3..0764862925f9f 100644 --- a/sql/core/src/test/resources/sql-tests/results/join.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/join.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 62 +-- Number of queries: 31 -- !query 0 @@ -75,128 +75,12 @@ struct -- !query 4 -SELECT c.c1, c.c2, c.c3, c.c4 -FROM ( - SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 - FROM - ( - SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 100 and src1.key < 300 - ) a - LEFT OUTER JOIN - ( - SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 200 and src2.key < 400 - ) b - ON (a.c1 = b.c3) -) c --- !query 4 schema -struct --- !query 4 output -165 val_165 NULL NULL -165 val_165 NULL NULL -251 val_251 251 val_251 - - --- !query 5 -SELECT c.c1, c.c2, c.c3, c.c4 -FROM ( - SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 - FROM - ( - SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 100 and src1.key < 300 - ) a - RIGHT OUTER JOIN - ( - SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 200 and src2.key < 400 - ) b - ON (a.c1 = b.c3) -) c --- !query 5 schema -struct --- !query 5 output -251 val_251 251 val_251 -NULL NULL 330 val_330 - - --- !query 6 -SELECT c.c1, c.c2, c.c3, c.c4 -FROM ( - SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 - FROM - ( - SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 100 and src1.key < 300 - ) a - FULL OUTER JOIN - ( - SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 200 and src2.key < 400 - ) b - ON (a.c1 = b.c3) -) c --- !query 6 schema -struct --- !query 6 output -165 val_165 NULL NULL -165 val_165 NULL NULL -251 val_251 251 val_251 -NULL NULL 330 val_330 - - --- !query 7 -SELECT c.c1, c.c2, c.c3, c.c4, c.c5, c.c6 -FROM ( - SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4, c.c5 AS c5, c.c6 AS c6 - FROM - ( - SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 10 and src1.key < 150 - ) a - FULL OUTER JOIN - ( - SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 150 and src2.key < 300 - ) b - ON (a.c1 = b.c3) - LEFT OUTER JOIN - ( - SELECT src3.key AS c5, src3.value AS c6 FROM src src3 WHERE src3.key > 200 and src3.key < 400 - ) c - ON (a.c1 = c.c5) -) c --- !query 7 schema -struct --- !query 7 output -86 val_86 NULL NULL NULL NULL -NULL NULL 165 val_165 NULL NULL -NULL NULL 165 val_165 NULL NULL -NULL NULL 251 val_251 NULL NULL - - --- !query 8 -SELECT c.c1, c.c2, c.c3, c.c4 -FROM ( - SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 - FROM - ( - SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 100 and src1.key < 300 - ) a - LEFT OUTER JOIN - ( - SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 200 and src2.key < 400 - ) b - ON (a.c1 = b.c3) -) c -where c.c3 IS NULL AND c.c1 IS NOT NULL --- !query 8 schema -struct --- !query 8 output -165 val_165 NULL NULL -165 val_165 NULL NULL - - --- !query 9 SELECT src1.key, src2.value FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) WHERE src1.ds = '2008-04-08' and src1.hr = '12' --- !query 9 schema +-- !query 4 schema struct --- !query 9 output +-- !query 4 output 165 val_165 165 val_165 165 val_165 @@ -206,16 +90,16 @@ struct 86 val_86 --- !query 10 +-- !query 5 FROM (SELECT src.* FROM src) x JOIN (SELECT src.* FROM src) Y ON (x.key = Y.key) select Y.key, Y.value --- !query 10 schema +-- !query 5 schema struct --- !query 10 output +-- !query 5 output 165 val_165 165 val_165 165 val_165 @@ -225,16 +109,16 @@ struct 86 val_86 --- !query 11 +-- !query 6 SELECT src1.c1, src2.c4 FROM (SELECT src.key as c1, src.value as c2 from src) src1 JOIN (SELECT src.key as c3, src.value as c4 from src) src2 ON src1.c1 = src2.c3 AND src1.c1 < 200 --- !query 11 schema +-- !query 6 schema struct --- !query 11 output +-- !query 6 output 165 val_165 165 val_165 165 val_165 @@ -242,7 +126,7 @@ struct 86 val_86 --- !query 12 +-- !query 7 SELECT src1.c1, src2.c4 FROM (SELECT src.key as c1, src.value as c2 from src) src1 @@ -252,13 +136,13 @@ FROM JOIN (SELECT src.key as c5, src.value as c6 from src) src3 ON src1.c1 = src3.c5 AND src3.c5 < 100 --- !query 12 schema +-- !query 7 schema struct --- !query 12 output +-- !query 7 output 86 val_86 --- !query 13 +-- !query 8 SELECT src1.c1, src2.c4 FROM (SELECT src.key as c1, src.value as c2 from src) src1 @@ -268,37 +152,37 @@ FROM JOIN (SELECT src.key as c5, src.value as c6 from src) src3 ON src1.c1 + src2.c3 = src3.c5 AND src3.c5 < 400 --- !query 13 schema +-- !query 8 schema struct --- !query 13 output +-- !query 8 output 165 val_165 165 val_165 165 val_165 165 val_165 --- !query 14 +-- !query 9 FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 200 SELECT src.key, srcpart.value --- !query 14 schema +-- !query 9 schema struct --- !query 14 output +-- !query 9 output 251 val_251 251 val_251 330 val_330 330 val_330 --- !query 15 +-- !query 10 SELECT a.k1, a.v1, a.k2, a.v2 FROM ( SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 FROM src src1 JOIN src src2 ON (src1.key = src2.key) SORT BY k1, v1, k2, v2 ) a --- !query 15 schema +-- !query 10 schema struct --- !query 15 output +-- !query 10 output 165 val_165 165 val_165 165 val_165 165 val_165 165 val_165 165 val_165 @@ -308,28 +192,28 @@ struct 86 val_86 86 val_86 --- !query 16 +-- !query 11 SELECT subq.key, tab.value FROM (select a.key, a.value from src a where a.key > 100 ) subq JOIN src tab ON (subq.key = tab.key and subq.key > 150 and subq.value = tab.value) where tab.key < 200 --- !query 16 schema +-- !query 11 schema struct --- !query 16 output +-- !query 11 output 165 val_165 165 val_165 165 val_165 165 val_165 --- !query 17 +-- !query 12 SELECT src1.*, src2.* FROM src src1 JOIN src src2 ON (src1.key = src2.key) --- !query 17 schema +-- !query 12 schema struct --- !query 17 output +-- !query 12 output 165 val_165 165 val_165 165 val_165 165 val_165 165 val_165 165 val_165 @@ -339,60 +223,13 @@ struct 86 val_86 86 val_86 --- !query 18 -SELECT a.key, a.value, b.key, b.value -FROM - ( - SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key - ) a - FULL OUTER JOIN - ( - SELECT src2.key as key, count(distinct(src2.value)) AS value - FROM src1 src2 group by src2.key - ) b -ON (a.key = b.key) --- !query 18 schema -struct --- !query 18 output -165 2 165 1 -251 1 NULL NULL -330 1 330 1 -86 1 86 1 -NULL NULL 201 0 -NULL NULL NULL 1 - - --- !query 19 -SELECT a.key, a.value, b.key, b.value1, b.value2 -FROM - ( - SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key - ) a - FULL OUTER JOIN - ( - SELECT src2.key as key, count(distinct(src2.value)) AS value1, - count(distinct(src2.key)) AS value2 - FROM src1 src2 group by src2.key - ) b -ON (a.key = b.key) --- !query 19 schema -struct --- !query 19 output -165 2 165 1 1 -251 1 NULL NULL NULL -330 1 330 1 1 -86 1 86 1 1 -NULL NULL 201 0 1 -NULL NULL NULL 1 0 - - --- !query 20 +-- !query 13 SELECT src1.key, src2.value FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') --- !query 20 schema +-- !query 13 schema struct --- !query 20 output +-- !query 13 output 165 val_165 165 val_165 165 val_165 @@ -423,66 +260,7 @@ struct 86 val_86 --- !query 21 -SELECT a.k1,a.v1,a.k2,a.v2,a.k3,a.v3 -FROM ( - SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 - FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 200) - RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 300) - SORT BY k1,v1,k2,v2,k3,v3 -)a --- !query 21 schema -struct --- !query 21 output -165 val_165 165 val_165 165 val_165 -165 val_165 165 val_165 165 val_165 -165 val_165 165 val_165 165 val_165 -165 val_165 165 val_165 165 val_165 -165 val_165 165 val_165 165 val_165 -165 val_165 165 val_165 165 val_165 -165 val_165 165 val_165 165 val_165 -165 val_165 165 val_165 165 val_165 -86 val_86 86 val_86 86 val_86 -NULL NULL NULL NULL 251 val_251 -NULL NULL NULL NULL 330 val_330 - - --- !query 22 -SELECT a.k1,a.v1,a.k2,a.v2,a.k3,a.v3 -FROM ( - SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 - FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key < 100) - RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 300) - SORT BY k1,v1,k2,v2,k3,v3 -)a --- !query 22 schema -struct --- !query 22 output -86 val_86 86 val_86 86 val_86 -NULL NULL NULL NULL 165 val_165 -NULL NULL NULL NULL 165 val_165 -NULL NULL NULL NULL 251 val_251 -NULL NULL NULL NULL 330 val_330 - - --- !query 23 -SELECT * -FROM - src src1 - LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) - RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) -SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value --- !query 23 schema -struct --- !query 23 output -NULL NULL NULL NULL 165 val_165 -NULL NULL NULL NULL 165 val_165 -NULL NULL NULL NULL 251 val_251 -NULL NULL NULL NULL 330 val_330 -NULL NULL NULL NULL 86 val_86 - - --- !query 24 +-- !query 14 SELECT src5.src1_value FROM (SELECT src3.*, src4.value as src4_value, src4.key as src4_key @@ -491,9 +269,9 @@ FROM FROM src src1 JOIN src src2 ON src1.key = src2.key) src3 ON src3.src1_key = src4.key) src5 --- !query 24 schema +-- !query 14 schema struct --- !query 24 output +-- !query 14 output val_165 val_165 val_165 @@ -507,13 +285,13 @@ val_330 val_86 --- !query 25 +-- !query 15 SELECT * FROM src src1 JOIN src src2 WHERE src1.key < 200 and src2.key < 200 SORT BY src1.key, src1.value, src2.key, src2.value --- !query 25 schema +-- !query 15 schema struct --- !query 25 output +-- !query 15 output 165 val_165 165 val_165 165 val_165 165 val_165 165 val_165 165 val_165 @@ -525,26 +303,26 @@ struct 86 val_86 86 val_86 --- !query 26 +-- !query 16 WITH tst1 AS (SELECT a.key, count(1) as cnt FROM src a group by a.key) SELECT sum(a.cnt) FROM tst1 a JOIN tst1 b ON a.key = b.key --- !query 26 schema +-- !query 16 schema struct --- !query 26 output +-- !query 16 output 5 --- !query 27 +-- !query 17 SELECT x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key order by x.key --- !query 27 schema +-- !query 17 schema struct --- !query 27 output +-- !query 17 output 86 1 165 4 330 1 --- !query 28 +-- !query 18 SELECT count(1) FROM ( @@ -557,202 +335,22 @@ JOIN SELECT src.key as k, src.value as v from src ) src3 ON src_12.key = src3.k AND src3.k < 300 --- !query 28 schema +-- !query 18 schema struct --- !query 28 output +-- !query 18 output 10 --- !query 29 -SELECT * FROM src src1 - LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) - RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) - SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value --- !query 29 schema -struct --- !query 29 output -NULL NULL NULL NULL 165 val_165 -NULL NULL NULL NULL 165 val_165 -NULL NULL NULL NULL 251 val_251 -NULL NULL NULL NULL 330 val_330 -NULL NULL NULL NULL 86 val_86 - - --- !query 30 -SELECT * FROM src src1 - RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) - RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) - SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value --- !query 30 schema -struct --- !query 30 output -NULL NULL 165 val_165 165 val_165 -NULL NULL 165 val_165 165 val_165 -NULL NULL 165 val_165 165 val_165 -NULL NULL 165 val_165 165 val_165 -NULL NULL 86 val_86 86 val_86 -NULL NULL NULL NULL 251 val_251 -NULL NULL NULL NULL 330 val_330 - - --- !query 31 -SELECT * FROM src src1 - LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) - LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) - SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value --- !query 31 schema -struct --- !query 31 output -165 val_165 NULL NULL NULL NULL -165 val_165 NULL NULL NULL NULL -251 val_251 NULL NULL NULL NULL -330 val_330 NULL NULL NULL NULL -86 val_86 NULL NULL NULL NULL - - --- !query 32 -SELECT * FROM src src1 - RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) - LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) - SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value --- !query 32 schema -struct --- !query 32 output -NULL NULL 165 val_165 165 val_165 -NULL NULL 165 val_165 165 val_165 -NULL NULL 165 val_165 165 val_165 -NULL NULL 165 val_165 165 val_165 -NULL NULL 251 val_251 NULL NULL -NULL NULL 330 val_330 NULL NULL -NULL NULL 86 val_86 86 val_86 - - --- !query 33 -SELECT * FROM src src1 - JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) - LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) - SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value --- !query 33 schema -struct --- !query 33 output - - - --- !query 34 -SELECT * FROM src src1 - JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) - RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) - SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value --- !query 34 schema -struct --- !query 34 output -NULL NULL NULL NULL 165 val_165 -NULL NULL NULL NULL 165 val_165 -NULL NULL NULL NULL 251 val_251 -NULL NULL NULL NULL 330 val_330 -NULL NULL NULL NULL 86 val_86 - - --- !query 35 -SELECT * FROM src src1 - LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) - JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) - SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value --- !query 35 schema -struct --- !query 35 output - - - --- !query 36 -SELECT * FROM src src1 - RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) - JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) - SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value --- !query 36 schema -struct --- !query 36 output -NULL NULL 165 val_165 165 val_165 -NULL NULL 165 val_165 165 val_165 -NULL NULL 165 val_165 165 val_165 -NULL NULL 165 val_165 165 val_165 -NULL NULL 86 val_86 86 val_86 - - --- !query 37 -FROM -(SELECT src.* FROM src sort by key) x -JOIN -(SELECT src.* FROM src sort by value) Y -ON (x.key = Y.key) -select Y.key,Y.value --- !query 37 schema -struct --- !query 37 output -165 val_165 -165 val_165 -165 val_165 -165 val_165 -251 val_251 -330 val_330 -86 val_86 - - --- !query 38 -FROM -(SELECT src.* FROM src sort by key) x -LEFT OUTER JOIN -(SELECT src.* FROM src sort by value) Y -ON (x.key = Y.key) -select Y.key,Y.value --- !query 38 schema -struct --- !query 38 output -165 val_165 -165 val_165 -165 val_165 -165 val_165 -251 val_251 -330 val_330 -86 val_86 - - --- !query 39 -FROM -(SELECT src.* FROM src sort by key) x -RIGHT OUTER JOIN -(SELECT src.* FROM src sort by value) Y -ON (x.key = Y.key) -select Y.key,Y.value --- !query 39 schema -struct --- !query 39 output -165 val_165 -165 val_165 -165 val_165 -165 val_165 -251 val_251 -330 val_330 -86 val_86 - - --- !query 40 +-- !query 19 FROM (SELECT src.* FROM src sort by key) x JOIN (SELECT src.* FROM src sort by value) Y ON (x.key = Y.key) -JOIN -(SELECT src.* FROM src sort by value) Z -ON (x.key = Z.key) select Y.key,Y.value --- !query 40 schema +-- !query 19 schema struct --- !query 40 output -165 val_165 -165 val_165 -165 val_165 -165 val_165 +-- !query 19 output 165 val_165 165 val_165 165 val_165 @@ -762,123 +360,19 @@ struct 86 val_86 --- !query 41 +-- !query 20 FROM (SELECT src.* FROM src sort by key) x JOIN (SELECT src.* FROM src sort by value) Y ON (x.key = Y.key) -LEFT OUTER JOIN -(SELECT src.* FROM src sort by value) Z -ON (x.key = Z.key) -select Y.key,Y.value --- !query 41 schema -struct --- !query 41 output -165 val_165 -165 val_165 -165 val_165 -165 val_165 -165 val_165 -165 val_165 -165 val_165 -165 val_165 -251 val_251 -330 val_330 -86 val_86 - - --- !query 42 -FROM -(SELECT src.* FROM src sort by key) x -LEFT OUTER JOIN -(SELECT src.* FROM src sort by value) Y -ON (x.key = Y.key) -LEFT OUTER JOIN -(SELECT src.* FROM src sort by value) Z -ON (x.key = Z.key) -select Y.key,Y.value --- !query 42 schema -struct --- !query 42 output -165 val_165 -165 val_165 -165 val_165 -165 val_165 -165 val_165 -165 val_165 -165 val_165 -165 val_165 -251 val_251 -330 val_330 -86 val_86 - - --- !query 43 -FROM -(SELECT src.* FROM src sort by key) x -LEFT OUTER JOIN -(SELECT src.* FROM src sort by value) Y -ON (x.key = Y.key) -RIGHT OUTER JOIN -(SELECT src.* FROM src sort by value) Z -ON (x.key = Z.key) -select Y.key,Y.value --- !query 43 schema -struct --- !query 43 output -165 val_165 -165 val_165 -165 val_165 -165 val_165 -165 val_165 -165 val_165 -165 val_165 -165 val_165 -251 val_251 -330 val_330 -86 val_86 - - --- !query 44 -FROM -(SELECT src.* FROM src sort by key) x -RIGHT OUTER JOIN -(SELECT src.* FROM src sort by value) Y -ON (x.key = Y.key) -RIGHT OUTER JOIN -(SELECT src.* FROM src sort by value) Z -ON (x.key = Z.key) -select Y.key,Y.value --- !query 44 schema -struct --- !query 44 output -165 val_165 -165 val_165 -165 val_165 -165 val_165 -165 val_165 -165 val_165 -165 val_165 -165 val_165 -251 val_251 -330 val_330 -86 val_86 - - --- !query 45 -FROM -(SELECT src.* FROM src sort by key) x -RIGHT OUTER JOIN -(SELECT src.* FROM src sort by value) Y -ON (x.key = Y.key) JOIN (SELECT src.* FROM src sort by value) Z ON (x.key = Z.key) select Y.key,Y.value --- !query 45 schema +-- !query 20 schema struct --- !query 45 output +-- !query 20 output 165 val_165 165 val_165 165 val_165 @@ -892,7 +386,7 @@ struct 86 val_86 --- !query 46 +-- !query 21 SELECT x.key, x.value, subq1.value FROM ( SELECT x.key as key, x.value as value from src x where x.key < 200 @@ -900,9 +394,9 @@ FROM SELECT x1.key as key, x1.value as value from src x1 where x1.key > 100 ) subq1 JOIN src1 x ON (x.key = subq1.key) --- !query 46 schema +-- !query 21 schema struct --- !query 46 output +-- !query 21 output 165 NULL val_165 165 NULL val_165 165 NULL val_165 @@ -915,7 +409,7 @@ struct 86 val_86 val_86 --- !query 47 +-- !query 22 SELECT x.key, x.value, subq1.cnt FROM ( SELECT x.key as key, count(1) as cnt from src x where x.key < 200 group by x.key @@ -923,9 +417,9 @@ FROM SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key ) subq1 JOIN src1 x ON (x.key = subq1.key) --- !query 47 schema +-- !query 22 schema struct --- !query 47 output +-- !query 22 output 165 NULL 2 165 NULL 2 165 val_165 2 @@ -934,167 +428,96 @@ struct 86 val_86 1 --- !query 48 +-- !query 23 SELECT x.key, COUNT(*) FROM src x JOIN src y ON x.key = y.key GROUP BY x.key --- !query 48 schema +-- !query 23 schema struct --- !query 48 output +-- !query 23 output 165 4 251 1 330 1 86 1 --- !query 49 -SELECT * FROM testData2 x LEFT SEMI JOIN testData2 y ON x.a >= y.a + 2 --- !query 49 schema -struct --- !query 49 output -3 1 -3 2 - - --- !query 50 -SELECT * FROM testData2 x LEFT SEMI JOIN testData2 y ON x.b = y.b and x.a >= y.a + 2 --- !query 50 schema -struct --- !query 50 output -3 1 -3 2 - - --- !query 51 -SELECT * FROM testData2 x LEFT SEMI JOIN testData2 y ON x.b = y.a and x.a >= y.b + 1 --- !query 51 schema -struct --- !query 51 output -2 1 -2 2 -3 1 -3 2 - - --- !query 52 +-- !query 24 SELECT * FROM uppercasedata u JOIN lowercasedata l WHERE u.n = l.N --- !query 52 schema +-- !query 24 schema struct --- !query 52 output +-- !query 24 output 1 A 1 a 2 B 2 b 3 C 3 c 4 D 4 d --- !query 53 +-- !query 25 SELECT * FROM uppercasedata u JOIN lowercasedata l ON u.n = l.N --- !query 53 schema +-- !query 25 schema struct --- !query 53 output +-- !query 25 output 1 A 1 a 2 B 2 b 3 C 3 c 4 D 4 d --- !query 54 +-- !query 26 SELECT * FROM (SELECT * FROM testdata2 WHERE a = 1) x JOIN (SELECT * FROM testdata2 WHERE a = 1) y WHERE x.a = y.a --- !query 54 schema +-- !query 26 schema struct --- !query 54 output +-- !query 26 output 1 1 1 1 1 1 1 2 1 2 1 1 1 2 1 2 --- !query 55 +-- !query 27 SELECT * FROM (SELECT * FROM testData2 WHERE a = 1) x JOIN (SELECT * FROM testData2 WHERE a = 2) y WHERE x.a = y.a --- !query 55 schema +-- !query 27 schema struct --- !query 55 output - - - --- !query 56 -SELECT * FROM - (SELECT * FROM upperCaseData WHERE N <= 4) leftTable FULL OUTER JOIN - (SELECT * FROM upperCaseData WHERE N >= 3) rightTable - ON leftTable.N = rightTable.N --- !query 56 schema -struct --- !query 56 output -1 A NULL NULL -2 B NULL NULL -3 C 3 C -4 D 4 D -NULL NULL 5 E -NULL NULL 6 F - +-- !query 27 output --- !query 57 -SELECT * FROM lowercasedata l RIGHT OUTER JOIN uppercasedata u ON l.n = u.N --- !query 57 schema -struct --- !query 57 output -1 a 1 A -2 b 2 B -3 c 3 C -4 d 4 D -NULL NULL 5 E -NULL NULL 6 F --- !query 58 -SELECT * FROM uppercasedata u LEFT OUTER JOIN lowercasedata l ON l.n = u.N --- !query 58 schema -struct --- !query 58 output -1 A 1 a -2 B 2 b -3 C 3 c -4 D 4 d -5 E NULL NULL -6 F NULL NULL - - --- !query 59 +-- !query 28 SELECT * FROM upperCaseData JOIN lowerCaseData ON lowerCaseData.n = upperCaseData.N --- !query 59 schema +-- !query 28 schema struct --- !query 59 output +-- !query 28 output 1 A 1 a 2 B 2 b 3 C 3 c 4 D 4 d --- !query 60 +-- !query 29 SELECT upperCaseData.N, upperCaseData.L FROM upperCaseData JOIN lowerCaseData ON lowerCaseData.n = upperCaseData.N --- !query 60 schema +-- !query 29 schema struct --- !query 60 output +-- !query 29 output 1 A 2 B 3 C 4 D --- !query 61 +-- !query 30 SELECT a.key, b.key, c.key FROM testData a,testData b,testData c where a.key = b.key and a.key = c.key and a.key < 5 --- !query 61 schema +-- !query 30 schema struct --- !query 61 output +-- !query 30 output 1 1 1 2 2 2 3 3 3 diff --git a/sql/core/src/test/resources/sql-tests/results/left-semi-join.sql.out b/sql/core/src/test/resources/sql-tests/results/left-semi-join.sql.out new file mode 100644 index 0000000000000..99c0a8586b31e --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/left-semi-join.sql.out @@ -0,0 +1,31 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 3 + + +-- !query 0 +SELECT * FROM testData2 x LEFT SEMI JOIN testData2 y ON x.a >= y.a + 2 +-- !query 0 schema +struct +-- !query 0 output +3 1 +3 2 + + +-- !query 1 +SELECT * FROM testData2 x LEFT SEMI JOIN testData2 y ON x.b = y.b and x.a >= y.a + 2 +-- !query 1 schema +struct +-- !query 1 output +3 1 +3 2 + + +-- !query 2 +SELECT * FROM testData2 x LEFT SEMI JOIN testData2 y ON x.b = y.a and x.a >= y.b + 1 +-- !query 2 schema +struct +-- !query 2 output +2 1 +2 2 +3 1 +3 2 diff --git a/sql/core/src/test/resources/sql-tests/results/outer-join.sql.out b/sql/core/src/test/resources/sql-tests/results/outer-join.sql.out new file mode 100644 index 0000000000000..4bb6f56e58cc7 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/outer-join.sql.out @@ -0,0 +1,550 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 28 + + +-- !query 0 +SELECT * FROM + (SELECT * FROM upperCaseData WHERE N <= 4) leftTable FULL OUTER JOIN + (SELECT * FROM upperCaseData WHERE N >= 3) rightTable + ON leftTable.N = rightTable.N +-- !query 0 schema +struct +-- !query 0 output +1 A NULL NULL +2 B NULL NULL +3 C 3 C +4 D 4 D +NULL NULL 5 E +NULL NULL 6 F + + +-- !query 1 +SELECT * FROM lowercasedata l RIGHT OUTER JOIN uppercasedata u ON l.n = u.N +-- !query 1 schema +struct +-- !query 1 output +1 a 1 A +2 b 2 B +3 c 3 C +4 d 4 D +NULL NULL 5 E +NULL NULL 6 F + + +-- !query 2 +SELECT * FROM uppercasedata u LEFT OUTER JOIN lowercasedata l ON l.n = u.N +-- !query 2 schema +struct +-- !query 2 output +1 A 1 a +2 B 2 b +3 C 3 c +4 D 4 d +5 E NULL NULL +6 F NULL NULL + + +-- !query 3 +SELECT c.c1, c.c2, c.c3, c.c4 +FROM ( + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 100 and src1.key < 300 + ) a + LEFT OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 200 and src2.key < 400 + ) b + ON (a.c1 = b.c3) +) c +-- !query 3 schema +struct +-- !query 3 output +165 val_165 NULL NULL +165 val_165 NULL NULL +251 val_251 251 val_251 + + +-- !query 4 +SELECT c.c1, c.c2, c.c3, c.c4 +FROM ( + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 100 and src1.key < 300 + ) a + RIGHT OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 200 and src2.key < 400 + ) b + ON (a.c1 = b.c3) +) c +-- !query 4 schema +struct +-- !query 4 output +251 val_251 251 val_251 +NULL NULL 330 val_330 + + +-- !query 5 +SELECT c.c1, c.c2, c.c3, c.c4 +FROM ( + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 100 and src1.key < 300 + ) a + FULL OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 200 and src2.key < 400 + ) b + ON (a.c1 = b.c3) +) c +-- !query 5 schema +struct +-- !query 5 output +165 val_165 NULL NULL +165 val_165 NULL NULL +251 val_251 251 val_251 +NULL NULL 330 val_330 + + +-- !query 6 +SELECT c.c1, c.c2, c.c3, c.c4, c.c5, c.c6 +FROM ( + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4, c.c5 AS c5, c.c6 AS c6 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 10 and src1.key < 150 + ) a + FULL OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 150 and src2.key < 300 + ) b + ON (a.c1 = b.c3) + LEFT OUTER JOIN + ( + SELECT src3.key AS c5, src3.value AS c6 FROM src src3 WHERE src3.key > 200 and src3.key < 400 + ) c + ON (a.c1 = c.c5) +) c +-- !query 6 schema +struct +-- !query 6 output +86 val_86 NULL NULL NULL NULL +NULL NULL 165 val_165 NULL NULL +NULL NULL 165 val_165 NULL NULL +NULL NULL 251 val_251 NULL NULL + + +-- !query 7 +SELECT c.c1, c.c2, c.c3, c.c4 +FROM ( + SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 + FROM + ( + SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 100 and src1.key < 300 + ) a + LEFT OUTER JOIN + ( + SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 200 and src2.key < 400 + ) b + ON (a.c1 = b.c3) +) c +where c.c3 IS NULL AND c.c1 IS NOT NULL +-- !query 7 schema +struct +-- !query 7 output +165 val_165 NULL NULL +165 val_165 NULL NULL + + +-- !query 8 +SELECT a.key, a.value, b.key, b.value +FROM + ( + SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key + ) a + FULL OUTER JOIN + ( + SELECT src2.key as key, count(distinct(src2.value)) AS value + FROM src1 src2 group by src2.key + ) b +ON (a.key = b.key) +-- !query 8 schema +struct +-- !query 8 output +165 2 165 1 +251 1 NULL NULL +330 1 330 1 +86 1 86 1 +NULL NULL 201 0 +NULL NULL NULL 1 + + +-- !query 9 +SELECT a.key, a.value, b.key, b.value1, b.value2 +FROM + ( + SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key + ) a + FULL OUTER JOIN + ( + SELECT src2.key as key, count(distinct(src2.value)) AS value1, + count(distinct(src2.key)) AS value2 + FROM src1 src2 group by src2.key + ) b +ON (a.key = b.key) +-- !query 9 schema +struct +-- !query 9 output +165 2 165 1 1 +251 1 NULL NULL NULL +330 1 330 1 1 +86 1 86 1 1 +NULL NULL 201 0 1 +NULL NULL NULL 1 0 + + +-- !query 10 +SELECT a.k1,a.v1,a.k2,a.v2,a.k3,a.v3 +FROM ( + SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 + FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 200) + RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 300) + SORT BY k1,v1,k2,v2,k3,v3 +)a +-- !query 10 schema +struct +-- !query 10 output +165 val_165 165 val_165 165 val_165 +165 val_165 165 val_165 165 val_165 +165 val_165 165 val_165 165 val_165 +165 val_165 165 val_165 165 val_165 +165 val_165 165 val_165 165 val_165 +165 val_165 165 val_165 165 val_165 +165 val_165 165 val_165 165 val_165 +165 val_165 165 val_165 165 val_165 +86 val_86 86 val_86 86 val_86 +NULL NULL NULL NULL 251 val_251 +NULL NULL NULL NULL 330 val_330 + + +-- !query 11 +SELECT a.k1,a.v1,a.k2,a.v2,a.k3,a.v3 +FROM ( + SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 + FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key < 100) + RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 300) + SORT BY k1,v1,k2,v2,k3,v3 +)a +-- !query 11 schema +struct +-- !query 11 output +86 val_86 86 val_86 86 val_86 +NULL NULL NULL NULL 165 val_165 +NULL NULL NULL NULL 165 val_165 +NULL NULL NULL NULL 251 val_251 +NULL NULL NULL NULL 330 val_330 + + +-- !query 12 +SELECT * +FROM + src src1 + LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) +SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +-- !query 12 schema +struct +-- !query 12 output +NULL NULL NULL NULL 165 val_165 +NULL NULL NULL NULL 165 val_165 +NULL NULL NULL NULL 251 val_251 +NULL NULL NULL NULL 330 val_330 +NULL NULL NULL NULL 86 val_86 + + +-- !query 13 +SELECT * FROM src src1 + LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +-- !query 13 schema +struct +-- !query 13 output +NULL NULL NULL NULL 165 val_165 +NULL NULL NULL NULL 165 val_165 +NULL NULL NULL NULL 251 val_251 +NULL NULL NULL NULL 330 val_330 +NULL NULL NULL NULL 86 val_86 + + +-- !query 14 +SELECT * FROM src src1 + RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +-- !query 14 schema +struct +-- !query 14 output +NULL NULL 165 val_165 165 val_165 +NULL NULL 165 val_165 165 val_165 +NULL NULL 165 val_165 165 val_165 +NULL NULL 165 val_165 165 val_165 +NULL NULL 86 val_86 86 val_86 +NULL NULL NULL NULL 251 val_251 +NULL NULL NULL NULL 330 val_330 + + +-- !query 15 +SELECT * FROM src src1 + LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +-- !query 15 schema +struct +-- !query 15 output +165 val_165 NULL NULL NULL NULL +165 val_165 NULL NULL NULL NULL +251 val_251 NULL NULL NULL NULL +330 val_330 NULL NULL NULL NULL +86 val_86 NULL NULL NULL NULL + + +-- !query 16 +SELECT * FROM src src1 + RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +-- !query 16 schema +struct +-- !query 16 output +NULL NULL 165 val_165 165 val_165 +NULL NULL 165 val_165 165 val_165 +NULL NULL 165 val_165 165 val_165 +NULL NULL 165 val_165 165 val_165 +NULL NULL 251 val_251 NULL NULL +NULL NULL 330 val_330 NULL NULL +NULL NULL 86 val_86 86 val_86 + + +-- !query 17 +SELECT * FROM src src1 + JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +-- !query 17 schema +struct +-- !query 17 output + + + +-- !query 18 +SELECT * FROM src src1 + JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +-- !query 18 schema +struct +-- !query 18 output +NULL NULL NULL NULL 165 val_165 +NULL NULL NULL NULL 165 val_165 +NULL NULL NULL NULL 251 val_251 +NULL NULL NULL NULL 330 val_330 +NULL NULL NULL NULL 86 val_86 + + +-- !query 19 +SELECT * FROM src src1 + LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +-- !query 19 schema +struct +-- !query 19 output + + + +-- !query 20 +SELECT * FROM src src1 + RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) + SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value +-- !query 20 schema +struct +-- !query 20 output +NULL NULL 165 val_165 165 val_165 +NULL NULL 165 val_165 165 val_165 +NULL NULL 165 val_165 165 val_165 +NULL NULL 165 val_165 165 val_165 +NULL NULL 86 val_86 86 val_86 + + +-- !query 21 +FROM +(SELECT src.* FROM src sort by key) x +LEFT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +select Y.key,Y.value +-- !query 21 schema +struct +-- !query 21 output +165 val_165 +165 val_165 +165 val_165 +165 val_165 +251 val_251 +330 val_330 +86 val_86 + + +-- !query 22 +FROM +(SELECT src.* FROM src sort by key) x +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +select Y.key,Y.value +-- !query 22 schema +struct +-- !query 22 output +165 val_165 +165 val_165 +165 val_165 +165 val_165 +251 val_251 +330 val_330 +86 val_86 + + +-- !query 23 +FROM +(SELECT src.* FROM src sort by key) x +JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value +-- !query 23 schema +struct +-- !query 23 output +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +251 val_251 +330 val_330 +86 val_86 + + +-- !query 24 +FROM +(SELECT src.* FROM src sort by key) x +LEFT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value +-- !query 24 schema +struct +-- !query 24 output +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +251 val_251 +330 val_330 +86 val_86 + + +-- !query 25 +FROM +(SELECT src.* FROM src sort by key) x +LEFT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value +-- !query 25 schema +struct +-- !query 25 output +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +251 val_251 +330 val_330 +86 val_86 + + +-- !query 26 +FROM +(SELECT src.* FROM src sort by key) x +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value +-- !query 26 schema +struct +-- !query 26 output +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +251 val_251 +330 val_330 +86 val_86 + + +-- !query 27 +FROM +(SELECT src.* FROM src sort by key) x +RIGHT OUTER JOIN +(SELECT src.* FROM src sort by value) Y +ON (x.key = Y.key) +JOIN +(SELECT src.* FROM src sort by value) Z +ON (x.key = Z.key) +select Y.key,Y.value +-- !query 27 schema +struct +-- !query 27 output +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +165 val_165 +251 val_251 +330 val_330 +86 val_86 diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index 25dbee747f4a1..dfef9b33306eb 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -280,8 +280,6 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext { |as src1(key, value) """.stripMargin) - session.sql("select * from src1").show(false) - Seq((251, "val_251", "2008-04-08", "11"), (251, "val_251", "2008-04-09", "11"), (251, "val_251", "2008-04-08", "12"), From a059c773bd16afe47fca142d923337d8c96a0e59 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Wed, 17 Aug 2016 16:59:39 -0700 Subject: [PATCH 17/27] more test cases --- .../test/resources/sql-tests/inputs/join.sql | 21 +++ .../resources/sql-tests/results/join.sql.out | 154 +++++++++++++++++- .../apache/spark/sql/DataFrameJoinSuite.scala | 40 +++++ .../org/apache/spark/sql/SQLQuerySuite.scala | 96 ----------- 4 files changed, 214 insertions(+), 97 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/join.sql b/sql/core/src/test/resources/sql-tests/inputs/join.sql index 8f4fff2ca534c..163fbbdde16a9 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/join.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/join.sql @@ -202,3 +202,24 @@ SELECT upperCaseData.N, upperCaseData.L FROM upperCaseData JOIN lowerCaseData SELECT a.key, b.key, c.key FROM testData a,testData b,testData c where a.key = b.key and a.key = c.key and a.key < 5; + +-- big inner join, 4 matches per row +SELECT x.key, x.value, y.key, y.value, count(1) FROM + (SELECT * FROM testData UNION ALL + SELECT * FROM testData UNION ALL + SELECT * FROM testData UNION ALL + SELECT * FROM testData) x JOIN + (SELECT * FROM testData UNION ALL + SELECT * FROM testData UNION ALL + SELECT * FROM testData UNION ALL + SELECT * FROM testData) y +WHERE x.key = y.key group by x.key, x.value, y.key, y.value; + +-- mixed-case keywords +SeleCT * from + (select * from upperCaseData WherE N <= 4) leftTable fuLL OUtER joiN + (sElEcT * FROM upperCaseData whERe N >= 3) rightTable + oN leftTable.N = rightTable.N; + +-- Supporting relational operator '<=>' in Spark SQL +SELECT * FROM src1 as a JOIN src1 as b on a.value <=> b.value; diff --git a/sql/core/src/test/resources/sql-tests/results/join.sql.out b/sql/core/src/test/resources/sql-tests/results/join.sql.out index 0764862925f9f..edfe224099c8d 100644 --- a/sql/core/src/test/resources/sql-tests/results/join.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/join.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 31 +-- Number of queries: 34 -- !query 0 @@ -522,3 +522,155 @@ struct 2 2 2 3 3 3 4 4 4 + + +-- !query 31 +SELECT x.key, x.value, y.key, y.value, count(1) FROM + (SELECT * FROM testData UNION ALL + SELECT * FROM testData UNION ALL + SELECT * FROM testData UNION ALL + SELECT * FROM testData) x JOIN + (SELECT * FROM testData UNION ALL + SELECT * FROM testData UNION ALL + SELECT * FROM testData UNION ALL + SELECT * FROM testData) y +WHERE x.key = y.key group by x.key, x.value, y.key, y.value +-- !query 31 schema +struct +-- !query 31 output +1 1 1 1 16 +10 10 10 10 16 +100 100 100 100 16 +11 11 11 11 16 +12 12 12 12 16 +13 13 13 13 16 +14 14 14 14 16 +15 15 15 15 16 +16 16 16 16 16 +17 17 17 17 16 +18 18 18 18 16 +19 19 19 19 16 +2 2 2 2 16 +20 20 20 20 16 +21 21 21 21 16 +22 22 22 22 16 +23 23 23 23 16 +24 24 24 24 16 +25 25 25 25 16 +26 26 26 26 16 +27 27 27 27 16 +28 28 28 28 16 +29 29 29 29 16 +3 3 3 3 16 +30 30 30 30 16 +31 31 31 31 16 +32 32 32 32 16 +33 33 33 33 16 +34 34 34 34 16 +35 35 35 35 16 +36 36 36 36 16 +37 37 37 37 16 +38 38 38 38 16 +39 39 39 39 16 +4 4 4 4 16 +40 40 40 40 16 +41 41 41 41 16 +42 42 42 42 16 +43 43 43 43 16 +44 44 44 44 16 +45 45 45 45 16 +46 46 46 46 16 +47 47 47 47 16 +48 48 48 48 16 +49 49 49 49 16 +5 5 5 5 16 +50 50 50 50 16 +51 51 51 51 16 +52 52 52 52 16 +53 53 53 53 16 +54 54 54 54 16 +55 55 55 55 16 +56 56 56 56 16 +57 57 57 57 16 +58 58 58 58 16 +59 59 59 59 16 +6 6 6 6 16 +60 60 60 60 16 +61 61 61 61 16 +62 62 62 62 16 +63 63 63 63 16 +64 64 64 64 16 +65 65 65 65 16 +66 66 66 66 16 +67 67 67 67 16 +68 68 68 68 16 +69 69 69 69 16 +7 7 7 7 16 +70 70 70 70 16 +71 71 71 71 16 +72 72 72 72 16 +73 73 73 73 16 +74 74 74 74 16 +75 75 75 75 16 +76 76 76 76 16 +77 77 77 77 16 +78 78 78 78 16 +79 79 79 79 16 +8 8 8 8 16 +80 80 80 80 16 +81 81 81 81 16 +82 82 82 82 16 +83 83 83 83 16 +84 84 84 84 16 +85 85 85 85 16 +86 86 86 86 16 +87 87 87 87 16 +88 88 88 88 16 +89 89 89 89 16 +9 9 9 9 16 +90 90 90 90 16 +91 91 91 91 16 +92 92 92 92 16 +93 93 93 93 16 +94 94 94 94 16 +95 95 95 95 16 +96 96 96 96 16 +97 97 97 97 16 +98 98 98 98 16 +99 99 99 99 16 + + +-- !query 32 +SeleCT * from + (select * from upperCaseData WherE N <= 4) leftTable fuLL OUtER joiN + (sElEcT * FROM upperCaseData whERe N >= 3) rightTable + oN leftTable.N = rightTable.N +-- !query 32 schema +struct +-- !query 32 output +1 A NULL NULL +2 B NULL NULL +3 C 3 C +4 D 4 D +NULL NULL 5 E +NULL NULL 6 F + + +-- !query 33 +SELECT * FROM src1 as a JOIN src1 as b on a.value <=> b.value +-- !query 33 schema +struct +-- !query 33 output +165 NULL 165 NULL +165 NULL 201 NULL +165 NULL NULL NULL +165 val_165 165 val_165 +201 NULL 165 NULL +201 NULL 201 NULL +201 NULL NULL NULL +330 val_330 330 val_330 +86 val_86 86 val_86 +NULL NULL 165 NULL +NULL NULL 201 NULL +NULL NULL NULL NULL +NULL val_null NULL val_null diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala index 4342c039aefc8..4c3c0022fe887 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala @@ -21,6 +21,7 @@ import org.apache.spark.sql.catalyst.plans.{Inner, LeftOuter, RightOuter} import org.apache.spark.sql.catalyst.plans.logical.Join import org.apache.spark.sql.execution.joins.BroadcastHashJoinExec import org.apache.spark.sql.functions._ +import org.apache.spark.sql.internal.SQLConf import org.apache.spark.sql.test.SharedSQLContext class DataFrameJoinSuite extends QueryTest with SharedSQLContext { @@ -225,4 +226,43 @@ class DataFrameJoinSuite extends QueryTest with SharedSQLContext { Row(1, null) :: Row(null, 2) :: Nil ) } + + test("cartesian product join") { + withSQLConf(SQLConf.CROSS_JOINS_ENABLED.key -> "true") { + checkAnswer( + testData3.join(testData3), + Row(1, null, 1, null) :: + Row(1, null, 2, 2) :: + Row(2, 2, 1, null) :: + Row(2, 2, 2, 2) :: Nil) + } + } + + + test("SortMergeJoin returns wrong results when using UnsafeRows") { + // This test is for the fix of https://issues.apache.org/jira/browse/SPARK-10737. + // This bug will be triggered when Tungsten is enabled and there are multiple + // SortMergeJoin operators executed in the same task. + val confs = SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "1" :: Nil + withSQLConf(confs: _*) { + val df1 = (1 to 50).map(i => (s"str_$i", i)).toDF("i", "j") + val df2 = + df1 + .join(df1.select(df1("i")), "i") + .select(df1("i"), df1("j")) + + val df3 = df2.withColumnRenamed("i", "i1").withColumnRenamed("j", "j1") + val df4 = + df2 + .join(df3, df2("i") === df3("i1")) + .withColumn("diff", $"j" - $"j1") + .select(df2("i"), df2("j"), $"diff") + + checkAnswer( + df4, + df1.withColumn("diff", lit(0))) + } + } + + } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 1705989216957..12424fc22cf1e 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -22,9 +22,6 @@ import java.math.MathContext import java.sql.{Date, Timestamp} import org.apache.spark.{AccumulatorSuite, SparkException} -import org.apache.spark.sql.catalyst.analysis.UnresolvedException -import org.apache.spark.sql.catalyst.expressions.SortOrder -import org.apache.spark.sql.catalyst.plans.logical.Aggregate import org.apache.spark.sql.catalyst.util.StringUtils import org.apache.spark.sql.execution.aggregate import org.apache.spark.sql.execution.joins.{BroadcastHashJoinExec, CartesianProductExec, SortMergeJoinExec} @@ -126,16 +123,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { } } - test("support table.star") { - checkAnswer( - sql( - """ - |SELECT r.* - |FROM testData l join testData2 r on (l.key = r.a) - """.stripMargin), - Row(1, 1) :: Row(1, 2) :: Row(2, 1) :: Row(2, 2) :: Row(3, 1) :: Row(3, 2) :: Nil) - } - test("self join with alias in agg") { Seq(1, 2, 3) .map(i => (i, i.toString)) @@ -636,35 +623,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { } } - test("big inner join, 4 matches per row") { - checkAnswer( - sql( - """ - |SELECT * FROM - | (SELECT * FROM testData UNION ALL - | SELECT * FROM testData UNION ALL - | SELECT * FROM testData UNION ALL - | SELECT * FROM testData) x JOIN - | (SELECT * FROM testData UNION ALL - | SELECT * FROM testData UNION ALL - | SELECT * FROM testData UNION ALL - | SELECT * FROM testData) y - |WHERE x.key = y.key""".stripMargin), - testData.rdd.flatMap( - row => Seq.fill(16)(Row.merge(row, row))).collect().toSeq) - } - - test("cartesian product join") { - withSQLConf(SQLConf.CROSS_JOINS_ENABLED.key -> "true") { - checkAnswer( - testData3.join(testData3), - Row(1, null, 1, null) :: - Row(1, null, 2, 2) :: - Row(2, 2, 1, null) :: - Row(2, 2, 2, 2) :: Nil) - } - } - test("SPARK-11111 null-safe join should not use cartesian product") { val df = sql("select count(*) from testData a join testData b on (a.key <=> b.key)") val cp = df.queryExecution.sparkPlan.collect { @@ -696,23 +654,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { Row(2, "b", 2) :: Nil) } - test("mixed-case keywords") { - checkAnswer( - sql( - """ - |SeleCT * from - | (select * from upperCaseData WherE N <= 4) leftTable fuLL OUtER joiN - | (sElEcT * FROM upperCaseData whERe N >= 3) rightTable - | oN leftTable.N = rightTable.N - """.stripMargin), - Row(1, "A", null, null) :: - Row(2, "B", null, null) :: - Row(3, "C", 3, "C") :: - Row(4, "D", 4, "D") :: - Row(null, null, 5, "E") :: - Row(null, null, 6, "F") :: Nil) - } - test("select with table name as qualifier") { checkAnswer( sql("SELECT testData.value FROM testData WHERE testData.key = 1"), @@ -1112,18 +1053,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { ) } - test("Supporting relational operator '<=>' in Spark SQL") { - val nullCheckData1 = TestData(1, "1") :: TestData(2, null) :: Nil - val rdd1 = sparkContext.parallelize((0 to 1).map(i => nullCheckData1(i))) - rdd1.toDF().createOrReplaceTempView("nulldata1") - val nullCheckData2 = TestData(1, "1") :: TestData(2, null) :: Nil - val rdd2 = sparkContext.parallelize((0 to 1).map(i => nullCheckData2(i))) - rdd2.toDF().createOrReplaceTempView("nulldata2") - checkAnswer(sql("SELECT nulldata1.key FROM nulldata1 join " + - "nulldata2 on nulldata1.value <=> nulldata2.value"), - (1 to 2).map(i => Row(i))) - } - test("Multi-column COUNT(DISTINCT ...)") { val data = TestData(1, "val_1") :: TestData(2, "val_2") :: Nil val rdd = sparkContext.parallelize((0 to 1).map(i => data(i))) @@ -1525,31 +1454,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { "org.apache.spark.sql.execution.datasources.jdbc")) } - test("SortMergeJoin returns wrong results when using UnsafeRows") { - // This test is for the fix of https://issues.apache.org/jira/browse/SPARK-10737. - // This bug will be triggered when Tungsten is enabled and there are multiple - // SortMergeJoin operators executed in the same task. - val confs = SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "1" :: Nil - withSQLConf(confs: _*) { - val df1 = (1 to 50).map(i => (s"str_$i", i)).toDF("i", "j") - val df2 = - df1 - .join(df1.select(df1("i")), "i") - .select(df1("i"), df1("j")) - - val df3 = df2.withColumnRenamed("i", "i1").withColumnRenamed("j", "j1") - val df4 = - df2 - .join(df3, df2("i") === df3("i1")) - .withColumn("diff", $"j" - $"j1") - .select(df2("i"), df2("j"), $"diff") - - checkAnswer( - df4, - df1.withColumn("diff", lit(0))) - } - } - test("SPARK-11303: filter should not be pushed down into sample") { val df = spark.range(100) List(true, false).foreach { withReplacement => From cf9a233622e88ba8fd34a4e2c2fd98aa4c7340e9 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Wed, 17 Aug 2016 17:57:41 -0700 Subject: [PATCH 18/27] code clean --- sql/core/src/test/resources/sql-tests/inputs/using-join.sql | 2 +- .../test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala | 3 --- .../test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala | 4 ---- 3 files changed, 1 insertion(+), 8 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/using-join.sql b/sql/core/src/test/resources/sql-tests/inputs/using-join.sql index 67284c20ea6e8..470c74d72ab99 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/using-join.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/using-join.sql @@ -40,4 +40,4 @@ SELECT * FROM ut1 join ut1 using (c1); -- clean up the temporary tables drop view ut1; drop view ut2; -drop view ut3; \ No newline at end of file +drop view ut3; diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala index 4c3c0022fe887..69404d55b390c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala @@ -238,7 +238,6 @@ class DataFrameJoinSuite extends QueryTest with SharedSQLContext { } } - test("SortMergeJoin returns wrong results when using UnsafeRows") { // This test is for the fix of https://issues.apache.org/jira/browse/SPARK-10737. // This bug will be triggered when Tungsten is enabled and there are multiple @@ -263,6 +262,4 @@ class DataFrameJoinSuite extends QueryTest with SharedSQLContext { df1.withColumn("diff", lit(0))) } } - - } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index dfef9b33306eb..c41f5ecd3821f 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -229,10 +229,6 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext { files ++ dirs.flatMap(listFilesRecursively) } - private def getFilePath(path: String): String = { - Thread.currentThread().getContextClassLoader.getResource(path).toString - } - /** Load built-in test tables into the SparkSession. */ private def loadTestData(session: SparkSession): Unit = { import session.implicits._ From b4801e005cd03d2a1e64347b345e55f481bcda7e Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Wed, 17 Aug 2016 17:58:22 -0700 Subject: [PATCH 19/27] code clean --- sql/core/src/test/resources/sql-tests/inputs/left-semi-join.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/left-semi-join.sql b/sql/core/src/test/resources/sql-tests/inputs/left-semi-join.sql index 19a6bfb9e7753..adbfc859aa20f 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/left-semi-join.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/left-semi-join.sql @@ -5,4 +5,4 @@ SELECT * FROM testData2 x LEFT SEMI JOIN testData2 y ON x.a >= y.a + 2; SELECT * FROM testData2 x LEFT SEMI JOIN testData2 y ON x.b = y.b and x.a >= y.a + 2; -- left semi greater than predicate and equal operator #2 -SELECT * FROM testData2 x LEFT SEMI JOIN testData2 y ON x.b = y.a and x.a >= y.b + 1; \ No newline at end of file +SELECT * FROM testData2 x LEFT SEMI JOIN testData2 y ON x.b = y.a and x.a >= y.b + 1; From bf556240e0f01cdd12f53a9407d8811ec30380d4 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Fri, 19 Aug 2016 21:02:24 -0700 Subject: [PATCH 20/27] update based on the lastest fix. --- .../scala/org/apache/spark/sql/SQLQueryTestSuite.scala | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index b0c5302f3c14e..61360dbcae116 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -276,13 +276,13 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext { session.sql( """ |CREATE OR REPLACE TEMPORARY VIEW src1 AS SELECT * FROM VALUES - |(201, CAST(null as String)), + |(201, null), |(86, "val_86"), - |(CAST(null as int), "val_null"), + |(null, "val_null"), |(165, "val_165"), - |(CAST(null as int), CAST(null as String)), + |(null, null), |(330, "val_330"), - |(165, CAST(null as String)) + |(165, null) |as src1(key, value) """.stripMargin) From 42b3c69368beeb63848ddba91a0fd21898e78918 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Sun, 21 Aug 2016 19:51:04 -0700 Subject: [PATCH 21/27] remove the comments (xyz.q) --- .../test/resources/sql-tests/inputs/join.sql | 46 ++++++++--------- .../resources/sql-tests/inputs/outer-join.sql | 50 +++++++++---------- 2 files changed, 48 insertions(+), 48 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/join.sql b/sql/core/src/test/resources/sql-tests/inputs/join.sql index 163fbbdde16a9..5545b888315eb 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/join.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/join.sql @@ -1,4 +1,4 @@ --- join nested table expressions (auto_join0.q) +-- join nested table expressions SELECT a.k1, a.v1, a.k2, a.v2 FROM ( SELECT src1.key as k1, src1.value as v1, @@ -9,26 +9,26 @@ SELECT src1.key as k1, src1.value as v1, SORT BY k1, v1, k2, v2 ) a; --- self-join + join condition (auto_join1.q) +-- self-join + join condition SELECT src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key); --- equi inner join + inner join with a complex join condition (auto_join2.q) +-- equi inner join + inner join with a complex join condition SELECT src1.key, src3.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key + src2.key = src3.key); --- equi inner join + equi inner join (auto_join3.q) +-- equi inner join + equi inner join SELECT src1.key, src3.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) JOIN src src3 ON (src1.key = src3.key); --- inner join + join condition + filter (auto_join9.q) +-- inner join + join condition + filter SELECT src1.key, src2.value FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) WHERE src1.ds = '2008-04-08' and src1.hr = '12'; --- equi inner join + table.star expansion in nested table expression (auto_join10.q) +-- equi inner join + table.star expansion in nested table expression FROM (SELECT src.* FROM src) x JOIN @@ -36,7 +36,7 @@ JOIN ON (x.key = Y.key) select Y.key, Y.value; --- inner join with a complex join condition over nested table expressions (auto_join11.q) +-- inner join with a complex join condition over nested table expressions SELECT src1.c1, src2.c4 FROM (SELECT src.key as c1, src.value as c2 from src) src1 @@ -44,7 +44,7 @@ FROM (SELECT src.key as c3, src.value as c4 from src) src2 ON src1.c1 = src2.c3 AND src1.c1 < 200; --- two inner join with a complex join condition over nested table expressions (auto_join12.q) +-- two inner join with a complex join condition over nested table expressions SELECT src1.c1, src2.c4 FROM (SELECT src.key as c1, src.value as c2 from src) src1 @@ -55,7 +55,7 @@ FROM (SELECT src.key as c5, src.value as c6 from src) src3 ON src1.c1 = src3.c5 AND src3.c5 < 100; --- two inner join with a complex join condition over nested table expressions (auto_join13.q) +-- two inner join with a complex join condition over nested table expressions SELECT src1.c1, src2.c4 FROM (SELECT src.key as c1, src.value as c2 from src) src1 @@ -66,11 +66,11 @@ FROM (SELECT src.key as c5, src.value as c6 from src) src3 ON src1.c1 + src2.c3 = src3.c5 AND src3.c5 < 400; --- join two different tables (auto_join14.q) +-- join two different tables FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 200 SELECT src.key, srcpart.value; --- join + sort by (auto_join15.q) +-- join + sort by SELECT a.k1, a.v1, a.k2, a.v2 FROM ( SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 @@ -78,7 +78,7 @@ SELECT a.k1, a.v1, a.k2, a.v2 SORT BY k1, v1, k2, v2 ) a; --- inner join with a filter above join and a filter below join (auto_join16.q) +-- inner join with a filter above join and a filter below join SELECT subq.key, tab.value FROM (select a.key, a.value from src a where a.key > 100 ) subq @@ -86,16 +86,16 @@ JOIN src tab ON (subq.key = tab.key and subq.key > 150 and subq.value = tab.value) where tab.key < 200; --- star expansion in nested table expression (auto_join17.q) +-- star expansion in nested table expression SELECT src1.*, src2.* FROM src src1 JOIN src src2 ON (src1.key = src2.key); --- join + disjunctive conditions (auto_join19.q) +-- join + disjunctive conditions SELECT src1.key, src2.value FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11'); --- nested join (auto_join22.q) +-- nested join SELECT src5.src1_value FROM (SELECT src3.*, src4.value as src4_value, src4.key as src4_key @@ -105,19 +105,19 @@ FROM JOIN src src2 ON src1.key = src2.key) src3 ON src3.src1_key = src4.key) src5; --- Cartesian join (auto_join23.q) +-- Cartesian join SELECT * FROM src src1 JOIN src src2 WHERE src1.key < 200 and src2.key < 200 SORT BY src1.key, src1.value, src2.key, src2.value; --- join (auto_join24.q) +-- join WITH tst1 AS (SELECT a.key, count(1) as cnt FROM src a group by a.key) SELECT sum(a.cnt) FROM tst1 a JOIN tst1 b ON a.key = b.key; --- aggregate over join results (auto_join26.q) +-- aggregate over join results SELECT x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key order by x.key; --- join over set operation over aggregate (auto_join27.q) +-- join over set operation over aggregate SELECT count(1) FROM ( @@ -131,7 +131,7 @@ JOIN ) src3 ON src_12.key = src3.k AND src3.k < 300; --- inner join with sorted by nested table expression (auto_join30.q) +-- inner join with sorted by nested table expression FROM (SELECT src.* FROM src sort by key) x JOIN @@ -139,7 +139,7 @@ JOIN ON (x.key = Y.key) select Y.key,Y.value; --- inner + inner with sorted by nested table expression (auto_join30.q) +-- inner + inner with sorted by nested table expression FROM (SELECT src.* FROM src sort by key) x JOIN @@ -150,7 +150,7 @@ JOIN ON (x.key = Z.key) select Y.key,Y.value; --- join over set operation (join34.q) +-- join over set operation SELECT x.key, x.value, subq1.value FROM ( SELECT x.key as key, x.value as value from src x where x.key < 200 @@ -159,7 +159,7 @@ FROM ) subq1 JOIN src1 x ON (x.key = subq1.key); --- join over set operation over aggregate (join35.q) +-- join over set operation over aggregate SELECT x.key, x.value, subq1.cnt FROM ( SELECT x.key as key, count(1) as cnt from src x where x.key < 200 group by x.key diff --git a/sql/core/src/test/resources/sql-tests/inputs/outer-join.sql b/sql/core/src/test/resources/sql-tests/inputs/outer-join.sql index f9be371e4e54c..056e835d982a1 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/outer-join.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/outer-join.sql @@ -11,7 +11,7 @@ SELECT * FROM lowercasedata l RIGHT OUTER JOIN uppercasedata u ON l.n = u.N; SELECT * FROM uppercasedata u LEFT OUTER JOIN lowercasedata l ON l.n = u.N; --- left-outer join over two nested table expressions (auto_join4.q) +-- left-outer join over two nested table expressions SELECT c.c1, c.c2, c.c3, c.c4 FROM ( SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 @@ -26,7 +26,7 @@ FROM ( ON (a.c1 = b.c3) ) c; --- right-outer join over two nested table expressions (auto_join5.q) +-- right-outer join over two nested table expressions SELECT c.c1, c.c2, c.c3, c.c4 FROM ( SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 @@ -41,7 +41,7 @@ FROM ( ON (a.c1 = b.c3) ) c; --- full-outer join over two nested table expressions (auto_join6.q) +-- full-outer join over two nested table expressions SELECT c.c1, c.c2, c.c3, c.c4 FROM ( SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 @@ -56,7 +56,7 @@ FROM ( ON (a.c1 = b.c3) ) c; --- full-outer join + left-outer join over nested table expressions (auto_join7.q) +-- full-outer join + left-outer join over nested table expressions SELECT c.c1, c.c2, c.c3, c.c4, c.c5, c.c6 FROM ( SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4, c.c5 AS c5, c.c6 AS c6 @@ -76,7 +76,7 @@ FROM ( ON (a.c1 = c.c5) ) c; --- left-outer join + join condition + filter (auto_join8.q) +-- left-outer join + join condition + filter SELECT c.c1, c.c2, c.c3, c.c4 FROM ( SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 @@ -92,7 +92,7 @@ FROM ( ) c where c.c3 IS NULL AND c.c1 IS NOT NULL; --- full outer join over Aggregate (auto_join18.q) +-- full outer join over Aggregate SELECT a.key, a.value, b.key, b.value FROM ( @@ -105,7 +105,7 @@ FROM ) b ON (a.key = b.key); --- full outer join + multi distinct (auto_join18_multi_distinct.q) +-- full outer join + multi distinct SELECT a.key, a.value, b.key, b.value1, b.value2 FROM ( @@ -119,7 +119,7 @@ FROM ) b ON (a.key = b.key); --- inner join + right-outer join #1 (auto_join20.q) +-- inner join + right-outer join #1 SELECT a.k1,a.v1,a.k2,a.v2,a.k3,a.v3 FROM ( SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 @@ -128,7 +128,7 @@ FROM ( SORT BY k1,v1,k2,v2,k3,v3 )a; --- inner join + right-outer join #2 (auto_join20.q) +-- inner join + right-outer join #2 SELECT a.k1,a.v1,a.k2,a.v2,a.k3,a.v3 FROM ( SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 @@ -137,7 +137,7 @@ FROM ( SORT BY k1,v1,k2,v2,k3,v3 )a; --- left outer join + right outer join (auto_join21.q) +-- left outer join + right outer join SELECT * FROM src src1 @@ -145,55 +145,55 @@ FROM RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; --- left outer + right outer (auto_join28.q) +-- left outer + right outer SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; --- right outer + right outer (auto_join28.q) +-- right outer + right outer SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; --- left outer + left outer (auto_join28.q) +-- left outer + left outer SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; --- right outer + left outer (auto_join28.q) +-- right outer + left outer SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; --- inner + left outer (auto_join29.q) +-- inner + left outer SELECT * FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; --- inner + right outer (auto_join29.q) +-- inner + right outer SELECT * FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; --- left + inner outer (auto_join29.q) +-- left + inner outer SELECT * FROM src src1 LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; --- right + inner join (auto_join29.q) +-- right + inner join SELECT * FROM src src1 RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; --- left outer join with sorted by nested table expression (auto_join30.q) +-- left outer join with sorted by nested table expression FROM (SELECT src.* FROM src sort by key) x LEFT OUTER JOIN @@ -201,7 +201,7 @@ LEFT OUTER JOIN ON (x.key = Y.key) select Y.key,Y.value; --- right outer join with sorted by nested table expression (auto_join30.q) +-- right outer join with sorted by nested table expression FROM (SELECT src.* FROM src sort by key) x RIGHT OUTER JOIN @@ -209,7 +209,7 @@ RIGHT OUTER JOIN ON (x.key = Y.key) select Y.key,Y.value; --- inner + left outer with sorted by nested table expression (auto_join30.q) +-- inner + left outer with sorted by nested table expression FROM (SELECT src.* FROM src sort by key) x JOIN @@ -220,7 +220,7 @@ LEFT OUTER JOIN ON (x.key = Z.key) select Y.key,Y.value; --- left + left outer with sorted by nested table expression (auto_join30.q) +-- left + left outer with sorted by nested table expression FROM (SELECT src.* FROM src sort by key) x LEFT OUTER JOIN @@ -231,7 +231,7 @@ LEFT OUTER JOIN ON (x.key = Z.key) select Y.key,Y.value; --- left + right outer with sorted by nested table expression (auto_join30.q) +-- left + right outer with sorted by nested table expression FROM (SELECT src.* FROM src sort by key) x LEFT OUTER JOIN @@ -242,7 +242,7 @@ RIGHT OUTER JOIN ON (x.key = Z.key) select Y.key,Y.value; --- right + right outer with sorted by nested table expression (auto_join30.q) +-- right + right outer with sorted by nested table expression FROM (SELECT src.* FROM src sort by key) x RIGHT OUTER JOIN @@ -253,7 +253,7 @@ RIGHT OUTER JOIN ON (x.key = Z.key) select Y.key,Y.value; --- right outer + inner with sorted by nested table expression (auto_join31.q) +-- right outer + inner with sorted by nested table expression FROM (SELECT src.* FROM src sort by key) x RIGHT OUTER JOIN From 111862698e90beaa2dcac287e74c3ce8b99fe1c4 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Sun, 21 Aug 2016 20:08:26 -0700 Subject: [PATCH 22/27] change the table name for testData and testData2 --- .../resources/sql-tests/inputs/arithmetic.sql | 14 +++++----- .../test/resources/sql-tests/inputs/join.sql | 26 +++++++++---------- .../sql-tests/inputs/left-semi-join.sql | 12 ++++++--- .../test/resources/sql-tests/inputs/limit.sql | 14 +++++----- .../sql-tests/results/arithmetic.sql.out | 14 +++++----- .../resources/sql-tests/results/join.sql.out | 26 +++++++++---------- .../sql-tests/results/left-semi-join.sql.out | 12 ++++++--- .../resources/sql-tests/results/limit.sql.out | 16 ++++++------ .../apache/spark/sql/SQLQueryTestSuite.scala | 7 +++-- 9 files changed, 78 insertions(+), 63 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql b/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql index f62b10ca0037b..79815099c59a6 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/arithmetic.sql @@ -4,17 +4,17 @@ select -100; select +230; select -5.2; select +6.8e0; -select -key, +key from testdata where key = 2; -select -(key + 1), - key + 1, +(key + 5) from testdata where key = 1; -select -max(key), +max(key) from testdata; +select -key, +key from uniqueRowData where key = 2; +select -(key + 1), - key + 1, +(key + 5) from uniqueRowData where key = 1; +select -max(key), +max(key) from uniqueRowData; select - (-10); -select + (-key) from testdata where key = 32; -select - (+max(key)) from testdata; +select + (-key) from uniqueRowData where key = 32; +select - (+max(key)) from uniqueRowData; select - - 3; select - + 20; select + + 100; -select - - max(key) from testdata; -select + - key from testdata where key = 33; +select - - max(key) from uniqueRowData; +select + - key from uniqueRowData where key = 33; -- div select 5 / 2; diff --git a/sql/core/src/test/resources/sql-tests/inputs/join.sql b/sql/core/src/test/resources/sql-tests/inputs/join.sql index 5545b888315eb..87422271cb4c1 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/join.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/join.sql @@ -181,14 +181,14 @@ SELECT * FROM uppercasedata u JOIN lowercasedata l ON u.n = l.N; -- inner join with multiple-match-per-row filtering predicates (where) SELECT * FROM - (SELECT * FROM testdata2 WHERE a = 1) x JOIN - (SELECT * FROM testdata2 WHERE a = 1) y + (SELECT * FROM duplicateColumnValueData WHERE a = 1) x JOIN + (SELECT * FROM duplicateColumnValueData WHERE a = 1) y WHERE x.a = y.a; -- inner join with no-match-per-row filtering predicates (where) SELECT * FROM - (SELECT * FROM testData2 WHERE a = 1) x JOIN - (SELECT * FROM testData2 WHERE a = 2) y + (SELECT * FROM duplicateColumnValueData WHERE a = 1) x JOIN + (SELECT * FROM duplicateColumnValueData WHERE a = 2) y WHERE x.a = y.a; -- inner join ON with table name as qualifier @@ -200,19 +200,19 @@ SELECT upperCaseData.N, upperCaseData.L FROM upperCaseData JOIN lowerCaseData -- SPARK-4120 Join of multiple tables does not work in SparkSQL SELECT a.key, b.key, c.key -FROM testData a,testData b,testData c +FROM uniqueRowData a,uniqueRowData b,uniqueRowData c where a.key = b.key and a.key = c.key and a.key < 5; -- big inner join, 4 matches per row SELECT x.key, x.value, y.key, y.value, count(1) FROM - (SELECT * FROM testData UNION ALL - SELECT * FROM testData UNION ALL - SELECT * FROM testData UNION ALL - SELECT * FROM testData) x JOIN - (SELECT * FROM testData UNION ALL - SELECT * FROM testData UNION ALL - SELECT * FROM testData UNION ALL - SELECT * FROM testData) y + (SELECT * FROM uniqueRowData UNION ALL + SELECT * FROM uniqueRowData UNION ALL + SELECT * FROM uniqueRowData UNION ALL + SELECT * FROM uniqueRowData) x JOIN + (SELECT * FROM uniqueRowData UNION ALL + SELECT * FROM uniqueRowData UNION ALL + SELECT * FROM uniqueRowData UNION ALL + SELECT * FROM uniqueRowData) y WHERE x.key = y.key group by x.key, x.value, y.key, y.value; -- mixed-case keywords diff --git a/sql/core/src/test/resources/sql-tests/inputs/left-semi-join.sql b/sql/core/src/test/resources/sql-tests/inputs/left-semi-join.sql index adbfc859aa20f..a8d3fafc04d68 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/left-semi-join.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/left-semi-join.sql @@ -1,8 +1,14 @@ -- left semi greater than predicate -SELECT * FROM testData2 x LEFT SEMI JOIN testData2 y ON x.a >= y.a + 2; +SELECT * +FROM duplicateColumnValueData x LEFT SEMI JOIN duplicateColumnValueData y +ON x.a >= y.a + 2; -- left semi greater than predicate and equal operator #1 -SELECT * FROM testData2 x LEFT SEMI JOIN testData2 y ON x.b = y.b and x.a >= y.a + 2; +SELECT * +FROM duplicateColumnValueData x LEFT SEMI JOIN duplicateColumnValueData y +ON x.b = y.b and x.a >= y.a + 2; -- left semi greater than predicate and equal operator #2 -SELECT * FROM testData2 x LEFT SEMI JOIN testData2 y ON x.b = y.a and x.a >= y.b + 1; +SELECT * +FROM duplicateColumnValueData x LEFT SEMI JOIN duplicateColumnValueData y +ON x.b = y.a and x.a >= y.b + 1; diff --git a/sql/core/src/test/resources/sql-tests/inputs/limit.sql b/sql/core/src/test/resources/sql-tests/inputs/limit.sql index 2ea35f7f3a5c8..a37c631f015ef 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/limit.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/limit.sql @@ -1,23 +1,23 @@ -- limit on various data types -select * from testdata limit 2; +select * from uniqueRowData limit 2; select * from arraydata limit 2; select * from mapdata limit 2; -- foldable non-literal in limit -select * from testdata limit 2 + 1; +select * from uniqueRowData limit 2 + 1; -select * from testdata limit CAST(1 AS int); +select * from uniqueRowData limit CAST(1 AS int); -- limit must be non-negative -select * from testdata limit -1; +select * from uniqueRowData limit -1; -- limit must be foldable -select * from testdata limit key > 3; +select * from uniqueRowData limit key > 3; -- limit must be integer -select * from testdata limit true; -select * from testdata limit 'a'; +select * from uniqueRowData limit true; +select * from uniqueRowData limit 'a'; -- limit within a subquery select * from (select * from range(10) limit 5) where id > 3; diff --git a/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out b/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out index 6abe048af477d..c1d3e6a898a5e 100644 --- a/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/arithmetic.sql.out @@ -35,7 +35,7 @@ struct<6.8:double> -- !query 4 -select -key, +key from testdata where key = 2 +select -key, +key from uniqueRowData where key = 2 -- !query 4 schema struct<(- key):int,key:int> -- !query 4 output @@ -43,7 +43,7 @@ struct<(- key):int,key:int> -- !query 5 -select -(key + 1), - key + 1, +(key + 5) from testdata where key = 1 +select -(key + 1), - key + 1, +(key + 5) from uniqueRowData where key = 1 -- !query 5 schema struct<(- (key + 1)):int,((- key) + 1):int,(key + 5):int> -- !query 5 output @@ -51,7 +51,7 @@ struct<(- (key + 1)):int,((- key) + 1):int,(key + 5):int> -- !query 6 -select -max(key), +max(key) from testdata +select -max(key), +max(key) from uniqueRowData -- !query 6 schema struct<(- max(key)):int,max(key):int> -- !query 6 output @@ -67,7 +67,7 @@ struct<(- -10):int> -- !query 8 -select + (-key) from testdata where key = 32 +select + (-key) from uniqueRowData where key = 32 -- !query 8 schema struct<(- key):int> -- !query 8 output @@ -75,7 +75,7 @@ struct<(- key):int> -- !query 9 -select - (+max(key)) from testdata +select - (+max(key)) from uniqueRowData -- !query 9 schema struct<(- max(key)):int> -- !query 9 output @@ -107,7 +107,7 @@ struct<100:int> -- !query 13 -select - - max(key) from testdata +select - - max(key) from uniqueRowData -- !query 13 schema struct<(- (- max(key))):int> -- !query 13 output @@ -115,7 +115,7 @@ struct<(- (- max(key))):int> -- !query 14 -select + - key from testdata where key = 33 +select + - key from uniqueRowData where key = 33 -- !query 14 schema struct<(- key):int> -- !query 14 output diff --git a/sql/core/src/test/resources/sql-tests/results/join.sql.out b/sql/core/src/test/resources/sql-tests/results/join.sql.out index edfe224099c8d..5f3c653553167 100644 --- a/sql/core/src/test/resources/sql-tests/results/join.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/join.sql.out @@ -465,8 +465,8 @@ struct -- !query 26 SELECT * FROM - (SELECT * FROM testdata2 WHERE a = 1) x JOIN - (SELECT * FROM testdata2 WHERE a = 1) y + (SELECT * FROM duplicateColumnValueData WHERE a = 1) x JOIN + (SELECT * FROM duplicateColumnValueData WHERE a = 1) y WHERE x.a = y.a -- !query 26 schema struct @@ -479,8 +479,8 @@ struct -- !query 27 SELECT * FROM - (SELECT * FROM testData2 WHERE a = 1) x JOIN - (SELECT * FROM testData2 WHERE a = 2) y + (SELECT * FROM duplicateColumnValueData WHERE a = 1) x JOIN + (SELECT * FROM duplicateColumnValueData WHERE a = 2) y WHERE x.a = y.a -- !query 27 schema struct @@ -513,7 +513,7 @@ struct -- !query 30 SELECT a.key, b.key, c.key -FROM testData a,testData b,testData c +FROM uniqueRowData a,uniqueRowData b,uniqueRowData c where a.key = b.key and a.key = c.key and a.key < 5 -- !query 30 schema struct @@ -526,14 +526,14 @@ struct -- !query 31 SELECT x.key, x.value, y.key, y.value, count(1) FROM - (SELECT * FROM testData UNION ALL - SELECT * FROM testData UNION ALL - SELECT * FROM testData UNION ALL - SELECT * FROM testData) x JOIN - (SELECT * FROM testData UNION ALL - SELECT * FROM testData UNION ALL - SELECT * FROM testData UNION ALL - SELECT * FROM testData) y + (SELECT * FROM uniqueRowData UNION ALL + SELECT * FROM uniqueRowData UNION ALL + SELECT * FROM uniqueRowData UNION ALL + SELECT * FROM uniqueRowData) x JOIN + (SELECT * FROM uniqueRowData UNION ALL + SELECT * FROM uniqueRowData UNION ALL + SELECT * FROM uniqueRowData UNION ALL + SELECT * FROM uniqueRowData) y WHERE x.key = y.key group by x.key, x.value, y.key, y.value -- !query 31 schema struct diff --git a/sql/core/src/test/resources/sql-tests/results/left-semi-join.sql.out b/sql/core/src/test/resources/sql-tests/results/left-semi-join.sql.out index 99c0a8586b31e..7f668033a794d 100644 --- a/sql/core/src/test/resources/sql-tests/results/left-semi-join.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/left-semi-join.sql.out @@ -3,7 +3,9 @@ -- !query 0 -SELECT * FROM testData2 x LEFT SEMI JOIN testData2 y ON x.a >= y.a + 2 +SELECT * +FROM duplicateColumnValueData x LEFT SEMI JOIN duplicateColumnValueData y +ON x.a >= y.a + 2 -- !query 0 schema struct -- !query 0 output @@ -12,7 +14,9 @@ struct -- !query 1 -SELECT * FROM testData2 x LEFT SEMI JOIN testData2 y ON x.b = y.b and x.a >= y.a + 2 +SELECT * +FROM duplicateColumnValueData x LEFT SEMI JOIN duplicateColumnValueData y +ON x.b = y.b and x.a >= y.a + 2 -- !query 1 schema struct -- !query 1 output @@ -21,7 +25,9 @@ struct -- !query 2 -SELECT * FROM testData2 x LEFT SEMI JOIN testData2 y ON x.b = y.a and x.a >= y.b + 1 +SELECT * +FROM duplicateColumnValueData x LEFT SEMI JOIN duplicateColumnValueData y +ON x.b = y.a and x.a >= y.b + 1 -- !query 2 schema struct -- !query 2 output diff --git a/sql/core/src/test/resources/sql-tests/results/limit.sql.out b/sql/core/src/test/resources/sql-tests/results/limit.sql.out index cb4e4d04810d0..e3d1a8bc87176 100644 --- a/sql/core/src/test/resources/sql-tests/results/limit.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/limit.sql.out @@ -3,7 +3,7 @@ -- !query 0 -select * from testdata limit 2 +select * from uniqueRowData limit 2 -- !query 0 schema struct -- !query 0 output @@ -30,7 +30,7 @@ struct> -- !query 3 -select * from testdata limit 2 + 1 +select * from uniqueRowData limit 2 + 1 -- !query 3 schema struct -- !query 3 output @@ -40,7 +40,7 @@ struct -- !query 4 -select * from testdata limit CAST(1 AS int) +select * from uniqueRowData limit CAST(1 AS int) -- !query 4 schema struct -- !query 4 output @@ -48,7 +48,7 @@ struct -- !query 5 -select * from testdata limit -1 +select * from uniqueRowData limit -1 -- !query 5 schema struct<> -- !query 5 output @@ -57,16 +57,16 @@ The limit expression must be equal to or greater than 0, but got -1; -- !query 6 -select * from testdata limit key > 3 +select * from uniqueRowData limit key > 3 -- !query 6 schema struct<> -- !query 6 output org.apache.spark.sql.AnalysisException -The limit expression must evaluate to a constant value, but got (testdata.`key` > 3); +The limit expression must evaluate to a constant value, but got (uniquerowdata.`key` > 3); -- !query 7 -select * from testdata limit true +select * from uniqueRowData limit true -- !query 7 schema struct<> -- !query 7 output @@ -75,7 +75,7 @@ The limit expression must be integer type, but got boolean; -- !query 8 -select * from testdata limit 'a' +select * from uniqueRowData limit 'a' -- !query 8 schema struct<> -- !query 8 output diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index 61360dbcae116..9f4eb5e0897fc 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -243,11 +243,14 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext { private def loadTestData(session: SparkSession): Unit = { import session.implicits._ - (1 to 100).map(i => (i, i.toString)).toDF("key", "value").createOrReplaceTempView("testdata") + // All column values are unique + (1 to 100).map(i => (i, i.toString)).toDF("key", "value") + .createOrReplaceTempView("uniqueRowData") + // Each column have duplicate values, but all the rows are unique Seq((1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2)) .toDF("a", "b") - .createOrReplaceTempView("testData2") + .createOrReplaceTempView("duplicateColumnValueData") ((Seq(1, 2, 3), Seq(Seq(1, 2, 3))) :: (Seq(2, 3, 4), Seq(Seq(2, 3, 4))) :: Nil) .toDF("arraycol", "nestedarraycol") From c73134edd265fd09204726b09bb3f7dff1ad3747 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Sun, 21 Aug 2016 20:28:09 -0700 Subject: [PATCH 23/27] rename src by duplicateRowData --- .../test/resources/sql-tests/inputs/join.sql | 91 +++++----- .../resources/sql-tests/inputs/outer-join.sql | 162 +++++++++++------- .../resources/sql-tests/results/join.sql.out | 91 +++++----- .../sql-tests/results/outer-join.sql.out | 162 +++++++++++------- .../apache/spark/sql/SQLQueryTestSuite.scala | 2 +- 5 files changed, 289 insertions(+), 219 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/join.sql b/sql/core/src/test/resources/sql-tests/inputs/join.sql index 87422271cb4c1..ba8cb9680b778 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/join.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/join.sql @@ -3,174 +3,177 @@ SELECT a.k1, a.v1, a.k2, a.v2 FROM ( SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 200) src1 + (SELECT * FROM duplicateRowData WHERE duplicateRowData.key < 200) src1 JOIN - (SELECT * FROM src WHERE src.key < 200) src2 + (SELECT * FROM duplicateRowData WHERE duplicateRowData.key < 200) src2 SORT BY k1, v1, k2, v2 ) a; -- self-join + join condition SELECT src1.key, src2.value -FROM src src1 JOIN src src2 ON (src1.key = src2.key); +FROM duplicateRowData src1 JOIN duplicateRowData src2 ON (src1.key = src2.key); -- equi inner join + inner join with a complex join condition SELECT src1.key, src3.value -FROM src src1 JOIN src src2 ON (src1.key = src2.key) - JOIN src src3 ON (src1.key + src2.key = src3.key); +FROM duplicateRowData src1 JOIN duplicateRowData src2 ON (src1.key = src2.key) + JOIN duplicateRowData src3 ON (src1.key + src2.key = src3.key); -- equi inner join + equi inner join SELECT src1.key, src3.value -FROM src src1 JOIN src src2 ON (src1.key = src2.key) - JOIN src src3 ON (src1.key = src3.key); +FROM duplicateRowData src1 JOIN duplicateRowData src2 ON (src1.key = src2.key) + JOIN duplicateRowData src3 ON (src1.key = src3.key); -- inner join + join condition + filter SELECT src1.key, src2.value -FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +FROM srcpart src1 JOIN duplicateRowData src2 ON (src1.key = src2.key) WHERE src1.ds = '2008-04-08' and src1.hr = '12'; -- equi inner join + table.star expansion in nested table expression FROM -(SELECT src.* FROM src) x +(SELECT duplicateRowData.* from duplicateRowData) x JOIN -(SELECT src.* FROM src) Y +(SELECT duplicateRowData.* from duplicateRowData) Y ON (x.key = Y.key) select Y.key, Y.value; -- inner join with a complex join condition over nested table expressions SELECT src1.c1, src2.c4 FROM - (SELECT src.key as c1, src.value as c2 from src) src1 + (SELECT duplicateRowData.key as c1, duplicateRowData.value as c2 from duplicateRowData) src1 JOIN - (SELECT src.key as c3, src.value as c4 from src) src2 + (SELECT duplicateRowData.key as c3, duplicateRowData.value as c4 from duplicateRowData) src2 ON src1.c1 = src2.c3 AND src1.c1 < 200; -- two inner join with a complex join condition over nested table expressions SELECT src1.c1, src2.c4 FROM - (SELECT src.key as c1, src.value as c2 from src) src1 + (SELECT duplicateRowData.key as c1, duplicateRowData.value as c2 from duplicateRowData) src1 JOIN - (SELECT src.key as c3, src.value as c4 from src) src2 + (SELECT duplicateRowData.key as c3, duplicateRowData.value as c4 from duplicateRowData) src2 ON src1.c1 = src2.c3 AND src1.c1 < 200 JOIN - (SELECT src.key as c5, src.value as c6 from src) src3 + (SELECT duplicateRowData.key as c5, duplicateRowData.value as c6 from duplicateRowData) src3 ON src1.c1 = src3.c5 AND src3.c5 < 100; -- two inner join with a complex join condition over nested table expressions SELECT src1.c1, src2.c4 FROM - (SELECT src.key as c1, src.value as c2 from src) src1 + (SELECT duplicateRowData.key as c1, duplicateRowData.value as c2 from duplicateRowData) src1 JOIN - (SELECT src.key as c3, src.value as c4 from src) src2 + (SELECT duplicateRowData.key as c3, duplicateRowData.value as c4 from duplicateRowData) src2 ON src1.c1 = src2.c3 AND src1.c1 < 250 JOIN - (SELECT src.key as c5, src.value as c6 from src) src3 + (SELECT duplicateRowData.key as c5, duplicateRowData.value as c6 from duplicateRowData) src3 ON src1.c1 + src2.c3 = src3.c5 AND src3.c5 < 400; -- join two different tables -FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 200 -SELECT src.key, srcpart.value; +FROM duplicateRowData JOIN srcpart +ON duplicateRowData.key = srcpart.key AND srcpart.ds = '2008-04-08' and duplicateRowData.key > 200 +SELECT duplicateRowData.key, srcpart.value; -- join + sort by SELECT a.k1, a.v1, a.k2, a.v2 FROM ( SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 - FROM src src1 JOIN src src2 ON (src1.key = src2.key) + FROM duplicateRowData src1 JOIN duplicateRowData src2 ON (src1.key = src2.key) SORT BY k1, v1, k2, v2 ) a; -- inner join with a filter above join and a filter below join SELECT subq.key, tab.value FROM -(select a.key, a.value from src a where a.key > 100 ) subq -JOIN src tab +(select a.key, a.value from duplicateRowData a where a.key > 100 ) subq +JOIN duplicateRowData tab ON (subq.key = tab.key and subq.key > 150 and subq.value = tab.value) where tab.key < 200; -- star expansion in nested table expression SELECT src1.*, src2.* -FROM src src1 JOIN src src2 ON (src1.key = src2.key); +FROM duplicateRowData src1 JOIN duplicateRowData src2 ON (src1.key = src2.key); -- join + disjunctive conditions SELECT src1.key, src2.value -FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +FROM srcpart src1 JOIN duplicateRowData src2 ON (src1.key = src2.key) where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11'); -- nested join SELECT src5.src1_value FROM (SELECT src3.*, src4.value as src4_value, src4.key as src4_key - FROM src src4 + FROM duplicateRowData src4 JOIN (SELECT src2.*, src1.key as src1_key, src1.value as src1_value - FROM src src1 - JOIN src src2 ON src1.key = src2.key) src3 + FROM duplicateRowData src1 + JOIN duplicateRowData src2 ON src1.key = src2.key) src3 ON src3.src1_key = src4.key) src5; -- Cartesian join -SELECT * FROM src src1 JOIN src src2 +SELECT * FROM duplicateRowData src1 JOIN duplicateRowData src2 WHERE src1.key < 200 and src2.key < 200 SORT BY src1.key, src1.value, src2.key, src2.value; -- join -WITH tst1 AS (SELECT a.key, count(1) as cnt FROM src a group by a.key) +WITH tst1 AS (SELECT a.key, count(1) as cnt FROM duplicateRowData a group by a.key) SELECT sum(a.cnt) FROM tst1 a JOIN tst1 b ON a.key = b.key; -- aggregate over join results -SELECT x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key order by x.key; +SELECT x.key, count(1) +FROM src1 x JOIN duplicateRowData y +ON (x.key = y.key) group by x.key order by x.key; -- join over set operation over aggregate SELECT count(1) FROM ( - SELECT src.key, src.value from src + SELECT duplicateRowData.key, duplicateRowData.value from duplicateRowData UNION ALL - SELECT DISTINCT src.key, src.value from src + SELECT DISTINCT duplicateRowData.key, duplicateRowData.value from duplicateRowData ) src_12 JOIN ( - SELECT src.key as k, src.value as v from src + SELECT duplicateRowData.key as k, duplicateRowData.value as v from duplicateRowData ) src3 ON src_12.key = src3.k AND src3.k < 300; -- inner join with sorted by nested table expression FROM -(SELECT src.* FROM src sort by key) x +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x JOIN -(SELECT src.* FROM src sort by value) Y +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y ON (x.key = Y.key) select Y.key,Y.value; -- inner + inner with sorted by nested table expression FROM -(SELECT src.* FROM src sort by key) x +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x JOIN -(SELECT src.* FROM src sort by value) Y +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y ON (x.key = Y.key) JOIN -(SELECT src.* FROM src sort by value) Z +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Z ON (x.key = Z.key) select Y.key,Y.value; -- join over set operation SELECT x.key, x.value, subq1.value FROM -( SELECT x.key as key, x.value as value from src x where x.key < 200 +( SELECT x.key as key, x.value as value from duplicateRowData x where x.key < 200 UNION ALL - SELECT x1.key as key, x1.value as value from src x1 where x1.key > 100 + SELECT x1.key as key, x1.value as value from duplicateRowData x1 where x1.key > 100 ) subq1 JOIN src1 x ON (x.key = subq1.key); -- join over set operation over aggregate SELECT x.key, x.value, subq1.cnt FROM -( SELECT x.key as key, count(1) as cnt from src x where x.key < 200 group by x.key +( SELECT x.key as key, count(1) as cnt from duplicateRowData x where x.key < 200 group by x.key UNION ALL - SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key + SELECT x1.key as key, count(1) as cnt from duplicateRowData x1 where x1.key > 100 group by x1.key ) subq1 JOIN src1 x ON (x.key = subq1.key); -- self join with aliases SELECT x.key, COUNT(*) -FROM src x JOIN src y ON x.key = y.key +FROM duplicateRowData x JOIN duplicateRowData y ON x.key = y.key GROUP BY x.key; -- inner join with one-match-per-row filtering predicates (where) diff --git a/sql/core/src/test/resources/sql-tests/inputs/outer-join.sql b/sql/core/src/test/resources/sql-tests/inputs/outer-join.sql index 056e835d982a1..adeec71a8a956 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/outer-join.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/outer-join.sql @@ -17,11 +17,13 @@ FROM ( SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 FROM ( - SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 100 and src1.key < 300 + SELECT src1.key AS c1, src1.value AS c2 + FROM duplicateRowData src1 WHERE src1.key > 100 and src1.key < 300 ) a LEFT OUTER JOIN ( - SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 200 and src2.key < 400 + SELECT src2.key AS c3, src2.value AS c4 + FROM duplicateRowData src2 WHERE src2.key > 200 and src2.key < 400 ) b ON (a.c1 = b.c3) ) c; @@ -32,11 +34,13 @@ FROM ( SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 FROM ( - SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 100 and src1.key < 300 + SELECT src1.key AS c1, src1.value AS c2 + FROM duplicateRowData src1 WHERE src1.key > 100 and src1.key < 300 ) a RIGHT OUTER JOIN ( - SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 200 and src2.key < 400 + SELECT src2.key AS c3, src2.value AS c4 + FROM duplicateRowData src2 WHERE src2.key > 200 and src2.key < 400 ) b ON (a.c1 = b.c3) ) c; @@ -47,11 +51,13 @@ FROM ( SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 FROM ( - SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 100 and src1.key < 300 + SELECT src1.key AS c1, src1.value AS c2 + FROM duplicateRowData src1 WHERE src1.key > 100 and src1.key < 300 ) a FULL OUTER JOIN ( - SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 200 and src2.key < 400 + SELECT src2.key AS c3, src2.value AS c4 + FROM duplicateRowData src2 WHERE src2.key > 200 and src2.key < 400 ) b ON (a.c1 = b.c3) ) c; @@ -62,16 +68,19 @@ FROM ( SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4, c.c5 AS c5, c.c6 AS c6 FROM ( - SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 10 and src1.key < 150 + SELECT src1.key AS c1, src1.value AS c2 + FROM duplicateRowData src1 WHERE src1.key > 10 and src1.key < 150 ) a FULL OUTER JOIN ( - SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 150 and src2.key < 300 + SELECT src2.key AS c3, src2.value AS c4 + FROM duplicateRowData src2 WHERE src2.key > 150 and src2.key < 300 ) b ON (a.c1 = b.c3) LEFT OUTER JOIN ( - SELECT src3.key AS c5, src3.value AS c6 FROM src src3 WHERE src3.key > 200 and src3.key < 400 + SELECT src3.key AS c5, src3.value AS c6 + FROM duplicateRowData src3 WHERE src3.key > 200 and src3.key < 400 ) c ON (a.c1 = c.c5) ) c; @@ -82,11 +91,13 @@ FROM ( SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 FROM ( - SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 100 and src1.key < 300 + SELECT src1.key AS c1, src1.value AS c2 + FROM duplicateRowData src1 WHERE src1.key > 100 and src1.key < 300 ) a LEFT OUTER JOIN ( - SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 200 and src2.key < 400 + SELECT src2.key AS c3, src2.value AS c4 + FROM duplicateRowData src2 WHERE src2.key > 200 and src2.key < 400 ) b ON (a.c1 = b.c3) ) c @@ -96,7 +107,8 @@ where c.c3 IS NULL AND c.c1 IS NOT NULL; SELECT a.key, a.value, b.key, b.value FROM ( - SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key + SELECT src1.key as key, count(src1.value) AS value + FROM duplicateRowData src1 group by src1.key ) a FULL OUTER JOIN ( @@ -109,7 +121,8 @@ ON (a.key = b.key); SELECT a.key, a.value, b.key, b.value1, b.value2 FROM ( - SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key + SELECT src1.key as key, count(src1.value) AS value + FROM duplicateRowData src1 group by src1.key ) a FULL OUTER JOIN ( @@ -122,144 +135,163 @@ ON (a.key = b.key); -- inner join + right-outer join #1 SELECT a.k1,a.v1,a.k2,a.v2,a.k3,a.v3 FROM ( - SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 - FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 200) - RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 300) + SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, + src3.value as v3 + FROM duplicateRowData src1 JOIN duplicateRowData src2 ON (src1.key = src2.key AND src1.key < 200) + RIGHT OUTER JOIN duplicateRowData src3 ON (src1.key = src3.key AND src3.key < 300) SORT BY k1,v1,k2,v2,k3,v3 )a; -- inner join + right-outer join #2 SELECT a.k1,a.v1,a.k2,a.v2,a.k3,a.v3 FROM ( - SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 - FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key < 100) - RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 300) + SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, + src3.value as v3 + FROM duplicateRowData src1 + JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key < 100) + RIGHT OUTER JOIN duplicateRowData src3 + ON (src1.key = src3.key AND src3.key < 300) SORT BY k1,v1,k2,v2,k3,v3 )a; -- left outer join + right outer join SELECT * FROM - src src1 - LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) - RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) + duplicateRowData src1 + LEFT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + RIGHT OUTER JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; -- left outer + right outer -SELECT * FROM src src1 - LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) - RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) +SELECT * FROM duplicateRowData src1 + LEFT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + RIGHT OUTER JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; -- right outer + right outer -SELECT * FROM src src1 - RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) - RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) +SELECT * FROM duplicateRowData src1 + RIGHT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + RIGHT OUTER JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; -- left outer + left outer -SELECT * FROM src src1 - LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) - LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) +SELECT * FROM duplicateRowData src1 + LEFT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + LEFT OUTER JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; -- right outer + left outer -SELECT * FROM src src1 - RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) - LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) +SELECT * FROM duplicateRowData src1 + RIGHT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + LEFT OUTER JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; -- inner + left outer -SELECT * FROM src src1 - JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) - LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) +SELECT * FROM duplicateRowData src1 + JOIN duplicateRowData src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + LEFT OUTER JOIN duplicateRowData src3 ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; -- inner + right outer -SELECT * FROM src src1 - JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) - RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) +SELECT * FROM duplicateRowData src1 + JOIN duplicateRowData src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + RIGHT OUTER JOIN duplicateRowData src3 ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; -- left + inner outer -SELECT * FROM src src1 - LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) - JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) +SELECT * FROM duplicateRowData src1 + LEFT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; -- right + inner join -SELECT * FROM src src1 - RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) - JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) +SELECT * FROM duplicateRowData src1 + RIGHT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value; -- left outer join with sorted by nested table expression FROM -(SELECT src.* FROM src sort by key) x +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x LEFT OUTER JOIN -(SELECT src.* FROM src sort by value) Y +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y ON (x.key = Y.key) select Y.key,Y.value; -- right outer join with sorted by nested table expression FROM -(SELECT src.* FROM src sort by key) x +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x RIGHT OUTER JOIN -(SELECT src.* FROM src sort by value) Y +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y ON (x.key = Y.key) select Y.key,Y.value; -- inner + left outer with sorted by nested table expression FROM -(SELECT src.* FROM src sort by key) x +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x JOIN -(SELECT src.* FROM src sort by value) Y +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y ON (x.key = Y.key) LEFT OUTER JOIN -(SELECT src.* FROM src sort by value) Z +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Z ON (x.key = Z.key) select Y.key,Y.value; -- left + left outer with sorted by nested table expression FROM -(SELECT src.* FROM src sort by key) x +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x LEFT OUTER JOIN -(SELECT src.* FROM src sort by value) Y +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y ON (x.key = Y.key) LEFT OUTER JOIN -(SELECT src.* FROM src sort by value) Z +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Z ON (x.key = Z.key) select Y.key,Y.value; -- left + right outer with sorted by nested table expression FROM -(SELECT src.* FROM src sort by key) x +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x LEFT OUTER JOIN -(SELECT src.* FROM src sort by value) Y +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y ON (x.key = Y.key) RIGHT OUTER JOIN -(SELECT src.* FROM src sort by value) Z +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Z ON (x.key = Z.key) select Y.key,Y.value; -- right + right outer with sorted by nested table expression FROM -(SELECT src.* FROM src sort by key) x +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x RIGHT OUTER JOIN -(SELECT src.* FROM src sort by value) Y +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y ON (x.key = Y.key) RIGHT OUTER JOIN -(SELECT src.* FROM src sort by value) Z +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Z ON (x.key = Z.key) select Y.key,Y.value; -- right outer + inner with sorted by nested table expression FROM -(SELECT src.* FROM src sort by key) x +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x RIGHT OUTER JOIN -(SELECT src.* FROM src sort by value) Y +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y ON (x.key = Y.key) JOIN -(SELECT src.* FROM src sort by value) Z +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Z ON (x.key = Z.key) select Y.key,Y.value; diff --git a/sql/core/src/test/resources/sql-tests/results/join.sql.out b/sql/core/src/test/resources/sql-tests/results/join.sql.out index 5f3c653553167..80a8c392bf0fd 100644 --- a/sql/core/src/test/resources/sql-tests/results/join.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/join.sql.out @@ -7,9 +7,9 @@ SELECT a.k1, a.v1, a.k2, a.v2 FROM ( SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 FROM - (SELECT * FROM src WHERE src.key < 200) src1 + (SELECT * FROM duplicateRowData WHERE duplicateRowData.key < 200) src1 JOIN - (SELECT * FROM src WHERE src.key < 200) src2 + (SELECT * FROM duplicateRowData WHERE duplicateRowData.key < 200) src2 SORT BY k1, v1, k2, v2 ) a -- !query 0 schema @@ -28,7 +28,7 @@ struct -- !query 1 SELECT src1.key, src2.value -FROM src src1 JOIN src src2 ON (src1.key = src2.key) +FROM duplicateRowData src1 JOIN duplicateRowData src2 ON (src1.key = src2.key) -- !query 1 schema struct -- !query 1 output @@ -43,8 +43,8 @@ struct -- !query 2 SELECT src1.key, src3.value -FROM src src1 JOIN src src2 ON (src1.key = src2.key) - JOIN src src3 ON (src1.key + src2.key = src3.key) +FROM duplicateRowData src1 JOIN duplicateRowData src2 ON (src1.key = src2.key) + JOIN duplicateRowData src3 ON (src1.key + src2.key = src3.key) -- !query 2 schema struct -- !query 2 output @@ -56,8 +56,8 @@ struct -- !query 3 SELECT src1.key, src3.value -FROM src src1 JOIN src src2 ON (src1.key = src2.key) - JOIN src src3 ON (src1.key = src3.key) +FROM duplicateRowData src1 JOIN duplicateRowData src2 ON (src1.key = src2.key) + JOIN duplicateRowData src3 ON (src1.key = src3.key) -- !query 3 schema struct -- !query 3 output @@ -76,7 +76,7 @@ struct -- !query 4 SELECT src1.key, src2.value -FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +FROM srcpart src1 JOIN duplicateRowData src2 ON (src1.key = src2.key) WHERE src1.ds = '2008-04-08' and src1.hr = '12' -- !query 4 schema struct @@ -92,9 +92,9 @@ struct -- !query 5 FROM -(SELECT src.* FROM src) x +(SELECT duplicateRowData.* from duplicateRowData) x JOIN -(SELECT src.* FROM src) Y +(SELECT duplicateRowData.* from duplicateRowData) Y ON (x.key = Y.key) select Y.key, Y.value -- !query 5 schema @@ -112,9 +112,9 @@ struct -- !query 6 SELECT src1.c1, src2.c4 FROM - (SELECT src.key as c1, src.value as c2 from src) src1 + (SELECT duplicateRowData.key as c1, duplicateRowData.value as c2 from duplicateRowData) src1 JOIN - (SELECT src.key as c3, src.value as c4 from src) src2 + (SELECT duplicateRowData.key as c3, duplicateRowData.value as c4 from duplicateRowData) src2 ON src1.c1 = src2.c3 AND src1.c1 < 200 -- !query 6 schema struct @@ -129,12 +129,12 @@ struct -- !query 7 SELECT src1.c1, src2.c4 FROM - (SELECT src.key as c1, src.value as c2 from src) src1 + (SELECT duplicateRowData.key as c1, duplicateRowData.value as c2 from duplicateRowData) src1 JOIN - (SELECT src.key as c3, src.value as c4 from src) src2 + (SELECT duplicateRowData.key as c3, duplicateRowData.value as c4 from duplicateRowData) src2 ON src1.c1 = src2.c3 AND src1.c1 < 200 JOIN - (SELECT src.key as c5, src.value as c6 from src) src3 + (SELECT duplicateRowData.key as c5, duplicateRowData.value as c6 from duplicateRowData) src3 ON src1.c1 = src3.c5 AND src3.c5 < 100 -- !query 7 schema struct @@ -145,12 +145,12 @@ struct -- !query 8 SELECT src1.c1, src2.c4 FROM - (SELECT src.key as c1, src.value as c2 from src) src1 + (SELECT duplicateRowData.key as c1, duplicateRowData.value as c2 from duplicateRowData) src1 JOIN - (SELECT src.key as c3, src.value as c4 from src) src2 + (SELECT duplicateRowData.key as c3, duplicateRowData.value as c4 from duplicateRowData) src2 ON src1.c1 = src2.c3 AND src1.c1 < 250 JOIN - (SELECT src.key as c5, src.value as c6 from src) src3 + (SELECT duplicateRowData.key as c5, duplicateRowData.value as c6 from duplicateRowData) src3 ON src1.c1 + src2.c3 = src3.c5 AND src3.c5 < 400 -- !query 8 schema struct @@ -162,8 +162,9 @@ struct -- !query 9 -FROM src JOIN srcpart ON src.key = srcpart.key AND srcpart.ds = '2008-04-08' and src.key > 200 -SELECT src.key, srcpart.value +FROM duplicateRowData JOIN srcpart +ON duplicateRowData.key = srcpart.key AND srcpart.ds = '2008-04-08' and duplicateRowData.key > 200 +SELECT duplicateRowData.key, srcpart.value -- !query 9 schema struct -- !query 9 output @@ -177,7 +178,7 @@ struct SELECT a.k1, a.v1, a.k2, a.v2 FROM ( SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 - FROM src src1 JOIN src src2 ON (src1.key = src2.key) + FROM duplicateRowData src1 JOIN duplicateRowData src2 ON (src1.key = src2.key) SORT BY k1, v1, k2, v2 ) a -- !query 10 schema @@ -195,8 +196,8 @@ struct -- !query 11 SELECT subq.key, tab.value FROM -(select a.key, a.value from src a where a.key > 100 ) subq -JOIN src tab +(select a.key, a.value from duplicateRowData a where a.key > 100 ) subq +JOIN duplicateRowData tab ON (subq.key = tab.key and subq.key > 150 and subq.value = tab.value) where tab.key < 200 -- !query 11 schema @@ -210,7 +211,7 @@ struct -- !query 12 SELECT src1.*, src2.* -FROM src src1 JOIN src src2 ON (src1.key = src2.key) +FROM duplicateRowData src1 JOIN duplicateRowData src2 ON (src1.key = src2.key) -- !query 12 schema struct -- !query 12 output @@ -225,7 +226,7 @@ struct -- !query 13 SELECT src1.key, src2.value -FROM srcpart src1 JOIN src src2 ON (src1.key = src2.key) +FROM srcpart src1 JOIN duplicateRowData src2 ON (src1.key = src2.key) where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') -- !query 13 schema struct @@ -264,10 +265,10 @@ struct SELECT src5.src1_value FROM (SELECT src3.*, src4.value as src4_value, src4.key as src4_key - FROM src src4 + FROM duplicateRowData src4 JOIN (SELECT src2.*, src1.key as src1_key, src1.value as src1_value - FROM src src1 - JOIN src src2 ON src1.key = src2.key) src3 + FROM duplicateRowData src1 + JOIN duplicateRowData src2 ON src1.key = src2.key) src3 ON src3.src1_key = src4.key) src5 -- !query 14 schema struct @@ -286,7 +287,7 @@ val_86 -- !query 15 -SELECT * FROM src src1 JOIN src src2 +SELECT * FROM duplicateRowData src1 JOIN duplicateRowData src2 WHERE src1.key < 200 and src2.key < 200 SORT BY src1.key, src1.value, src2.key, src2.value -- !query 15 schema @@ -304,7 +305,7 @@ struct -- !query 16 -WITH tst1 AS (SELECT a.key, count(1) as cnt FROM src a group by a.key) +WITH tst1 AS (SELECT a.key, count(1) as cnt FROM duplicateRowData a group by a.key) SELECT sum(a.cnt) FROM tst1 a JOIN tst1 b ON a.key = b.key -- !query 16 schema struct @@ -313,7 +314,9 @@ struct -- !query 17 -SELECT x.key, count(1) FROM src1 x JOIN src y ON (x.key = y.key) group by x.key order by x.key +SELECT x.key, count(1) +FROM src1 x JOIN duplicateRowData y +ON (x.key = y.key) group by x.key order by x.key -- !query 17 schema struct -- !query 17 output @@ -326,13 +329,13 @@ struct SELECT count(1) FROM ( - SELECT src.key, src.value from src + SELECT duplicateRowData.key, duplicateRowData.value from duplicateRowData UNION ALL - SELECT DISTINCT src.key, src.value from src + SELECT DISTINCT duplicateRowData.key, duplicateRowData.value from duplicateRowData ) src_12 JOIN ( - SELECT src.key as k, src.value as v from src + SELECT duplicateRowData.key as k, duplicateRowData.value as v from duplicateRowData ) src3 ON src_12.key = src3.k AND src3.k < 300 -- !query 18 schema @@ -343,9 +346,9 @@ struct -- !query 19 FROM -(SELECT src.* FROM src sort by key) x +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x JOIN -(SELECT src.* FROM src sort by value) Y +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y ON (x.key = Y.key) select Y.key,Y.value -- !query 19 schema @@ -362,12 +365,12 @@ struct -- !query 20 FROM -(SELECT src.* FROM src sort by key) x +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x JOIN -(SELECT src.* FROM src sort by value) Y +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y ON (x.key = Y.key) JOIN -(SELECT src.* FROM src sort by value) Z +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Z ON (x.key = Z.key) select Y.key,Y.value -- !query 20 schema @@ -389,9 +392,9 @@ struct -- !query 21 SELECT x.key, x.value, subq1.value FROM -( SELECT x.key as key, x.value as value from src x where x.key < 200 +( SELECT x.key as key, x.value as value from duplicateRowData x where x.key < 200 UNION ALL - SELECT x1.key as key, x1.value as value from src x1 where x1.key > 100 + SELECT x1.key as key, x1.value as value from duplicateRowData x1 where x1.key > 100 ) subq1 JOIN src1 x ON (x.key = subq1.key) -- !query 21 schema @@ -412,9 +415,9 @@ struct -- !query 22 SELECT x.key, x.value, subq1.cnt FROM -( SELECT x.key as key, count(1) as cnt from src x where x.key < 200 group by x.key +( SELECT x.key as key, count(1) as cnt from duplicateRowData x where x.key < 200 group by x.key UNION ALL - SELECT x1.key as key, count(1) as cnt from src x1 where x1.key > 100 group by x1.key + SELECT x1.key as key, count(1) as cnt from duplicateRowData x1 where x1.key > 100 group by x1.key ) subq1 JOIN src1 x ON (x.key = subq1.key) -- !query 22 schema @@ -430,7 +433,7 @@ struct -- !query 23 SELECT x.key, COUNT(*) -FROM src x JOIN src y ON x.key = y.key +FROM duplicateRowData x JOIN duplicateRowData y ON x.key = y.key GROUP BY x.key -- !query 23 schema struct diff --git a/sql/core/src/test/resources/sql-tests/results/outer-join.sql.out b/sql/core/src/test/resources/sql-tests/results/outer-join.sql.out index 4bb6f56e58cc7..9892bde2fdb70 100644 --- a/sql/core/src/test/resources/sql-tests/results/outer-join.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/outer-join.sql.out @@ -50,11 +50,13 @@ FROM ( SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 FROM ( - SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 100 and src1.key < 300 + SELECT src1.key AS c1, src1.value AS c2 + FROM duplicateRowData src1 WHERE src1.key > 100 and src1.key < 300 ) a LEFT OUTER JOIN ( - SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 200 and src2.key < 400 + SELECT src2.key AS c3, src2.value AS c4 + FROM duplicateRowData src2 WHERE src2.key > 200 and src2.key < 400 ) b ON (a.c1 = b.c3) ) c @@ -72,11 +74,13 @@ FROM ( SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 FROM ( - SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 100 and src1.key < 300 + SELECT src1.key AS c1, src1.value AS c2 + FROM duplicateRowData src1 WHERE src1.key > 100 and src1.key < 300 ) a RIGHT OUTER JOIN ( - SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 200 and src2.key < 400 + SELECT src2.key AS c3, src2.value AS c4 + FROM duplicateRowData src2 WHERE src2.key > 200 and src2.key < 400 ) b ON (a.c1 = b.c3) ) c @@ -93,11 +97,13 @@ FROM ( SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 FROM ( - SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 100 and src1.key < 300 + SELECT src1.key AS c1, src1.value AS c2 + FROM duplicateRowData src1 WHERE src1.key > 100 and src1.key < 300 ) a FULL OUTER JOIN ( - SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 200 and src2.key < 400 + SELECT src2.key AS c3, src2.value AS c4 + FROM duplicateRowData src2 WHERE src2.key > 200 and src2.key < 400 ) b ON (a.c1 = b.c3) ) c @@ -116,16 +122,19 @@ FROM ( SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4, c.c5 AS c5, c.c6 AS c6 FROM ( - SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 10 and src1.key < 150 + SELECT src1.key AS c1, src1.value AS c2 + FROM duplicateRowData src1 WHERE src1.key > 10 and src1.key < 150 ) a FULL OUTER JOIN ( - SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 150 and src2.key < 300 + SELECT src2.key AS c3, src2.value AS c4 + FROM duplicateRowData src2 WHERE src2.key > 150 and src2.key < 300 ) b ON (a.c1 = b.c3) LEFT OUTER JOIN ( - SELECT src3.key AS c5, src3.value AS c6 FROM src src3 WHERE src3.key > 200 and src3.key < 400 + SELECT src3.key AS c5, src3.value AS c6 + FROM duplicateRowData src3 WHERE src3.key > 200 and src3.key < 400 ) c ON (a.c1 = c.c5) ) c @@ -144,11 +153,13 @@ FROM ( SELECT a.c1 AS c1, a.c2 AS c2, b.c3 AS c3, b.c4 AS c4 FROM ( - SELECT src1.key AS c1, src1.value AS c2 FROM src src1 WHERE src1.key > 100 and src1.key < 300 + SELECT src1.key AS c1, src1.value AS c2 + FROM duplicateRowData src1 WHERE src1.key > 100 and src1.key < 300 ) a LEFT OUTER JOIN ( - SELECT src2.key AS c3, src2.value AS c4 FROM src src2 WHERE src2.key > 200 and src2.key < 400 + SELECT src2.key AS c3, src2.value AS c4 + FROM duplicateRowData src2 WHERE src2.key > 200 and src2.key < 400 ) b ON (a.c1 = b.c3) ) c @@ -164,7 +175,8 @@ struct SELECT a.key, a.value, b.key, b.value FROM ( - SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key + SELECT src1.key as key, count(src1.value) AS value + FROM duplicateRowData src1 group by src1.key ) a FULL OUTER JOIN ( @@ -187,7 +199,8 @@ NULL NULL NULL 1 SELECT a.key, a.value, b.key, b.value1, b.value2 FROM ( - SELECT src1.key as key, count(src1.value) AS value FROM src src1 group by src1.key + SELECT src1.key as key, count(src1.value) AS value + FROM duplicateRowData src1 group by src1.key ) a FULL OUTER JOIN ( @@ -210,9 +223,10 @@ NULL NULL NULL 1 0 -- !query 10 SELECT a.k1,a.v1,a.k2,a.v2,a.k3,a.v3 FROM ( - SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 - FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 200) - RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 300) + SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, + src3.value as v3 + FROM duplicateRowData src1 JOIN duplicateRowData src2 ON (src1.key = src2.key AND src1.key < 200) + RIGHT OUTER JOIN duplicateRowData src3 ON (src1.key = src3.key AND src3.key < 300) SORT BY k1,v1,k2,v2,k3,v3 )a -- !query 10 schema @@ -234,9 +248,13 @@ NULL NULL NULL NULL 330 val_330 -- !query 11 SELECT a.k1,a.v1,a.k2,a.v2,a.k3,a.v3 FROM ( - SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, src3.value as v3 - FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key < 100) - RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 300) + SELECT src1.key as k1, src1.value as v1, src2.key as k2, src2.value as v2 , src3.key as k3, + src3.value as v3 + FROM duplicateRowData src1 + JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key < 100) + RIGHT OUTER JOIN duplicateRowData src3 + ON (src1.key = src3.key AND src3.key < 300) SORT BY k1,v1,k2,v2,k3,v3 )a -- !query 11 schema @@ -252,9 +270,11 @@ NULL NULL NULL NULL 330 val_330 -- !query 12 SELECT * FROM - src src1 - LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) - RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) + duplicateRowData src1 + LEFT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + RIGHT OUTER JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value -- !query 12 schema struct @@ -267,9 +287,11 @@ NULL NULL NULL NULL 86 val_86 -- !query 13 -SELECT * FROM src src1 - LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) - RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) +SELECT * FROM duplicateRowData src1 + LEFT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + RIGHT OUTER JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value -- !query 13 schema struct @@ -282,9 +304,11 @@ NULL NULL NULL NULL 86 val_86 -- !query 14 -SELECT * FROM src src1 - RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) - RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) +SELECT * FROM duplicateRowData src1 + RIGHT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + RIGHT OUTER JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value -- !query 14 schema struct @@ -299,9 +323,11 @@ NULL NULL NULL NULL 330 val_330 -- !query 15 -SELECT * FROM src src1 - LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) - LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) +SELECT * FROM duplicateRowData src1 + LEFT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + LEFT OUTER JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value -- !query 15 schema struct @@ -314,9 +340,11 @@ struct -- !query 16 -SELECT * FROM src src1 - RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) - LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) +SELECT * FROM duplicateRowData src1 + RIGHT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + LEFT OUTER JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value -- !query 16 schema struct @@ -331,9 +359,9 @@ NULL NULL 86 val_86 86 val_86 -- !query 17 -SELECT * FROM src src1 - JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) - LEFT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) +SELECT * FROM duplicateRowData src1 + JOIN duplicateRowData src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + LEFT OUTER JOIN duplicateRowData src3 ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value -- !query 17 schema struct @@ -342,9 +370,9 @@ struct -- !query 18 -SELECT * FROM src src1 - JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) - RIGHT OUTER JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) +SELECT * FROM duplicateRowData src1 + JOIN duplicateRowData src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + RIGHT OUTER JOIN duplicateRowData src3 ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value -- !query 18 schema struct @@ -357,9 +385,11 @@ NULL NULL NULL NULL 86 val_86 -- !query 19 -SELECT * FROM src src1 - LEFT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) - JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) +SELECT * FROM duplicateRowData src1 + LEFT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value -- !query 19 schema struct @@ -368,9 +398,11 @@ struct -- !query 20 -SELECT * FROM src src1 - RIGHT OUTER JOIN src src2 ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) - JOIN src src3 ON (src2.key = src3.key AND src3.key < 200) +SELECT * FROM duplicateRowData src1 + RIGHT OUTER JOIN duplicateRowData src2 + ON (src1.key = src2.key AND src1.key < 200 AND src2.key > 200) + JOIN duplicateRowData src3 + ON (src2.key = src3.key AND src3.key < 200) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value -- !query 20 schema struct @@ -384,9 +416,9 @@ NULL NULL 86 val_86 86 val_86 -- !query 21 FROM -(SELECT src.* FROM src sort by key) x +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x LEFT OUTER JOIN -(SELECT src.* FROM src sort by value) Y +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y ON (x.key = Y.key) select Y.key,Y.value -- !query 21 schema @@ -403,9 +435,9 @@ struct -- !query 22 FROM -(SELECT src.* FROM src sort by key) x +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x RIGHT OUTER JOIN -(SELECT src.* FROM src sort by value) Y +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y ON (x.key = Y.key) select Y.key,Y.value -- !query 22 schema @@ -422,12 +454,12 @@ struct -- !query 23 FROM -(SELECT src.* FROM src sort by key) x +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x JOIN -(SELECT src.* FROM src sort by value) Y +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y ON (x.key = Y.key) LEFT OUTER JOIN -(SELECT src.* FROM src sort by value) Z +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Z ON (x.key = Z.key) select Y.key,Y.value -- !query 23 schema @@ -448,12 +480,12 @@ struct -- !query 24 FROM -(SELECT src.* FROM src sort by key) x +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x LEFT OUTER JOIN -(SELECT src.* FROM src sort by value) Y +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y ON (x.key = Y.key) LEFT OUTER JOIN -(SELECT src.* FROM src sort by value) Z +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Z ON (x.key = Z.key) select Y.key,Y.value -- !query 24 schema @@ -474,12 +506,12 @@ struct -- !query 25 FROM -(SELECT src.* FROM src sort by key) x +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x LEFT OUTER JOIN -(SELECT src.* FROM src sort by value) Y +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y ON (x.key = Y.key) RIGHT OUTER JOIN -(SELECT src.* FROM src sort by value) Z +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Z ON (x.key = Z.key) select Y.key,Y.value -- !query 25 schema @@ -500,12 +532,12 @@ struct -- !query 26 FROM -(SELECT src.* FROM src sort by key) x +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x RIGHT OUTER JOIN -(SELECT src.* FROM src sort by value) Y +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y ON (x.key = Y.key) RIGHT OUTER JOIN -(SELECT src.* FROM src sort by value) Z +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Z ON (x.key = Z.key) select Y.key,Y.value -- !query 26 schema @@ -526,12 +558,12 @@ struct -- !query 27 FROM -(SELECT src.* FROM src sort by key) x +(SELECT duplicateRowData.* FROM duplicateRowData sort by key) x RIGHT OUTER JOIN -(SELECT src.* FROM src sort by value) Y +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Y ON (x.key = Y.key) JOIN -(SELECT src.* FROM src sort by value) Z +(SELECT duplicateRowData.* FROM duplicateRowData sort by value) Z ON (x.key = Z.key) select Y.key,Y.value -- !query 27 schema diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index 9f4eb5e0897fc..03278d18872f3 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -274,7 +274,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext { Seq((251, "val_251"), (86, "val_86"), (165, "val_165"), (330, "val_330"), (165, "val_165")) .toDF("key", "value") - .createOrReplaceTempView("src") + .createOrReplaceTempView("duplicateRowData") session.sql( """ From a204f5244ad71e65889bea160b696ca870ef4fb7 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Sun, 21 Aug 2016 20:34:25 -0700 Subject: [PATCH 24/27] rename src1 by nullData --- sql/core/src/test/resources/sql-tests/inputs/join.sql | 8 ++++---- .../src/test/resources/sql-tests/inputs/outer-join.sql | 4 ++-- .../src/test/resources/sql-tests/results/join.sql.out | 8 ++++---- .../test/resources/sql-tests/results/outer-join.sql.out | 4 ++-- .../scala/org/apache/spark/sql/SQLQueryTestSuite.scala | 4 ++-- 5 files changed, 14 insertions(+), 14 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/join.sql b/sql/core/src/test/resources/sql-tests/inputs/join.sql index ba8cb9680b778..2b5fdbc536d5f 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/join.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/join.sql @@ -117,7 +117,7 @@ SELECT sum(a.cnt) FROM tst1 a JOIN tst1 b ON a.key = b.key; -- aggregate over join results SELECT x.key, count(1) -FROM src1 x JOIN duplicateRowData y +FROM nullData x JOIN duplicateRowData y ON (x.key = y.key) group by x.key order by x.key; -- join over set operation over aggregate @@ -160,7 +160,7 @@ FROM UNION ALL SELECT x1.key as key, x1.value as value from duplicateRowData x1 where x1.key > 100 ) subq1 -JOIN src1 x ON (x.key = subq1.key); +JOIN nullData x ON (x.key = subq1.key); -- join over set operation over aggregate SELECT x.key, x.value, subq1.cnt @@ -169,7 +169,7 @@ FROM UNION ALL SELECT x1.key as key, count(1) as cnt from duplicateRowData x1 where x1.key > 100 group by x1.key ) subq1 -JOIN src1 x ON (x.key = subq1.key); +JOIN nullData x ON (x.key = subq1.key); -- self join with aliases SELECT x.key, COUNT(*) @@ -225,4 +225,4 @@ SeleCT * from oN leftTable.N = rightTable.N; -- Supporting relational operator '<=>' in Spark SQL -SELECT * FROM src1 as a JOIN src1 as b on a.value <=> b.value; +SELECT * FROM nullData as a JOIN nullData as b on a.value <=> b.value; diff --git a/sql/core/src/test/resources/sql-tests/inputs/outer-join.sql b/sql/core/src/test/resources/sql-tests/inputs/outer-join.sql index adeec71a8a956..8669545653ae8 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/outer-join.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/outer-join.sql @@ -113,7 +113,7 @@ FROM FULL OUTER JOIN ( SELECT src2.key as key, count(distinct(src2.value)) AS value - FROM src1 src2 group by src2.key + FROM nullData src2 group by src2.key ) b ON (a.key = b.key); @@ -128,7 +128,7 @@ FROM ( SELECT src2.key as key, count(distinct(src2.value)) AS value1, count(distinct(src2.key)) AS value2 - FROM src1 src2 group by src2.key + FROM nullData src2 group by src2.key ) b ON (a.key = b.key); diff --git a/sql/core/src/test/resources/sql-tests/results/join.sql.out b/sql/core/src/test/resources/sql-tests/results/join.sql.out index 80a8c392bf0fd..32fc28c9319a7 100644 --- a/sql/core/src/test/resources/sql-tests/results/join.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/join.sql.out @@ -315,7 +315,7 @@ struct -- !query 17 SELECT x.key, count(1) -FROM src1 x JOIN duplicateRowData y +FROM nullData x JOIN duplicateRowData y ON (x.key = y.key) group by x.key order by x.key -- !query 17 schema struct @@ -396,7 +396,7 @@ FROM UNION ALL SELECT x1.key as key, x1.value as value from duplicateRowData x1 where x1.key > 100 ) subq1 -JOIN src1 x ON (x.key = subq1.key) +JOIN nullData x ON (x.key = subq1.key) -- !query 21 schema struct -- !query 21 output @@ -419,7 +419,7 @@ FROM UNION ALL SELECT x1.key as key, count(1) as cnt from duplicateRowData x1 where x1.key > 100 group by x1.key ) subq1 -JOIN src1 x ON (x.key = subq1.key) +JOIN nullData x ON (x.key = subq1.key) -- !query 22 schema struct -- !query 22 output @@ -660,7 +660,7 @@ NULL NULL 6 F -- !query 33 -SELECT * FROM src1 as a JOIN src1 as b on a.value <=> b.value +SELECT * FROM nullData as a JOIN nullData as b on a.value <=> b.value -- !query 33 schema struct -- !query 33 output diff --git a/sql/core/src/test/resources/sql-tests/results/outer-join.sql.out b/sql/core/src/test/resources/sql-tests/results/outer-join.sql.out index 9892bde2fdb70..c478745cc2036 100644 --- a/sql/core/src/test/resources/sql-tests/results/outer-join.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/outer-join.sql.out @@ -181,7 +181,7 @@ FROM FULL OUTER JOIN ( SELECT src2.key as key, count(distinct(src2.value)) AS value - FROM src1 src2 group by src2.key + FROM nullData src2 group by src2.key ) b ON (a.key = b.key) -- !query 8 schema @@ -206,7 +206,7 @@ FROM ( SELECT src2.key as key, count(distinct(src2.value)) AS value1, count(distinct(src2.key)) AS value2 - FROM src1 src2 group by src2.key + FROM nullData src2 group by src2.key ) b ON (a.key = b.key) -- !query 9 schema diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index 03278d18872f3..1f7c3ad28b94d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -278,7 +278,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext { session.sql( """ - |CREATE OR REPLACE TEMPORARY VIEW src1 AS SELECT * FROM VALUES + |CREATE OR REPLACE TEMPORARY VIEW nullData AS SELECT * FROM VALUES |(201, null), |(86, "val_86"), |(null, "val_null"), @@ -286,7 +286,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext { |(null, null), |(330, "val_330"), |(165, null) - |as src1(key, value) + |as nullData(key, value) """.stripMargin) Seq((251, "val_251", "2008-04-08", "11"), From 046c3c37171b93f23cdcb5c51c6c55566f815a24 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Sun, 21 Aug 2016 20:48:11 -0700 Subject: [PATCH 25/27] rename srcpart by partitionedData --- .../src/test/resources/sql-tests/inputs/join.sql | 11 ++++++----- .../test/resources/sql-tests/results/join.sql.out | 11 ++++++----- .../org/apache/spark/sql/SQLQueryTestSuite.scala | 13 ++++++++++--- 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/sql/core/src/test/resources/sql-tests/inputs/join.sql b/sql/core/src/test/resources/sql-tests/inputs/join.sql index 2b5fdbc536d5f..ecae4c00cb6f6 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/join.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/join.sql @@ -25,7 +25,7 @@ FROM duplicateRowData src1 JOIN duplicateRowData src2 ON (src1.key = src2.key) -- inner join + join condition + filter SELECT src1.key, src2.value -FROM srcpart src1 JOIN duplicateRowData src2 ON (src1.key = src2.key) +FROM partitionedData src1 JOIN duplicateRowData src2 ON (src1.key = src2.key) WHERE src1.ds = '2008-04-08' and src1.hr = '12'; -- equi inner join + table.star expansion in nested table expression @@ -67,9 +67,10 @@ FROM ON src1.c1 + src2.c3 = src3.c5 AND src3.c5 < 400; -- join two different tables -FROM duplicateRowData JOIN srcpart -ON duplicateRowData.key = srcpart.key AND srcpart.ds = '2008-04-08' and duplicateRowData.key > 200 -SELECT duplicateRowData.key, srcpart.value; +FROM duplicateRowData JOIN partitionedData +ON duplicateRowData.key = partitionedData.key AND partitionedData.ds = '2008-04-08' + AND duplicateRowData.key > 200 +SELECT duplicateRowData.key, partitionedData.value; -- join + sort by SELECT a.k1, a.v1, a.k2, a.v2 @@ -93,7 +94,7 @@ FROM duplicateRowData src1 JOIN duplicateRowData src2 ON (src1.key = src2.key); -- join + disjunctive conditions SELECT src1.key, src2.value -FROM srcpart src1 JOIN duplicateRowData src2 ON (src1.key = src2.key) +FROM partitionedData src1 JOIN duplicateRowData src2 ON (src1.key = src2.key) where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11'); -- nested join diff --git a/sql/core/src/test/resources/sql-tests/results/join.sql.out b/sql/core/src/test/resources/sql-tests/results/join.sql.out index 32fc28c9319a7..bef49c54ec74b 100644 --- a/sql/core/src/test/resources/sql-tests/results/join.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/join.sql.out @@ -76,7 +76,7 @@ struct -- !query 4 SELECT src1.key, src2.value -FROM srcpart src1 JOIN duplicateRowData src2 ON (src1.key = src2.key) +FROM partitionedData src1 JOIN duplicateRowData src2 ON (src1.key = src2.key) WHERE src1.ds = '2008-04-08' and src1.hr = '12' -- !query 4 schema struct @@ -162,9 +162,10 @@ struct -- !query 9 -FROM duplicateRowData JOIN srcpart -ON duplicateRowData.key = srcpart.key AND srcpart.ds = '2008-04-08' and duplicateRowData.key > 200 -SELECT duplicateRowData.key, srcpart.value +FROM duplicateRowData JOIN partitionedData +ON duplicateRowData.key = partitionedData.key AND partitionedData.ds = '2008-04-08' + AND duplicateRowData.key > 200 +SELECT duplicateRowData.key, partitionedData.value -- !query 9 schema struct -- !query 9 output @@ -226,7 +227,7 @@ struct -- !query 13 SELECT src1.key, src2.value -FROM srcpart src1 JOIN duplicateRowData src2 ON (src1.key = src2.key) +FROM partitionedData src1 JOIN duplicateRowData src2 ON (src1.key = src2.key) where (src1.ds = '2008-04-08' or src1.ds = '2008-04-09' )and (src1.hr = '12' or src1.hr = '11') -- !query 13 schema struct diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala index 1f7c3ad28b94d..29a45080bdc17 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala @@ -243,19 +243,21 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext { private def loadTestData(session: SparkSession): Unit = { import session.implicits._ - // All column values are unique + // A data set containing non-duplicate column values (1 to 100).map(i => (i, i.toString)).toDF("key", "value") .createOrReplaceTempView("uniqueRowData") - // Each column have duplicate values, but all the rows are unique + // A data set containing duplicate values for each column (but all the rows are unique) Seq((1, 1), (1, 2), (2, 1), (2, 2), (3, 1), (3, 2)) .toDF("a", "b") .createOrReplaceTempView("duplicateColumnValueData") + // A data set containing a complex data type: ARRAY ((Seq(1, 2, 3), Seq(Seq(1, 2, 3))) :: (Seq(2, 3, 4), Seq(Seq(2, 3, 4))) :: Nil) .toDF("arraycol", "nestedarraycol") .createOrReplaceTempView("arraydata") + // A data set containing a complex data type: MAP (Tuple1(Map(1 -> "a1", 2 -> "b1", 3 -> "c1", 4 -> "d1", 5 -> "e1")) :: Tuple1(Map(1 -> "a2", 2 -> "b2", 3 -> "c2", 4 -> "d2")) :: Tuple1(Map(1 -> "a3", 2 -> "b3", 3 -> "c3")) :: @@ -264,18 +266,22 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext { .toDF("mapcol") .createOrReplaceTempView("mapdata") + // A data set containing uppercase column names and column values Seq((1, "a"), (2, "b"), (3, "c"), (4, "d")) .toDF("n", "l") .createOrReplaceTempView("lowerCaseData") + // A data set containing uppercase column names and column values Seq((1, "A"), (2, "B"), (3, "C"), (4, "D"), (5, "E"), (6, "F")) .toDF("N", "L") .createOrReplaceTempView("upperCaseData") + // A data set containing duplicate rows Seq((251, "val_251"), (86, "val_86"), (165, "val_165"), (330, "val_330"), (165, "val_165")) .toDF("key", "value") .createOrReplaceTempView("duplicateRowData") + // A data set containing null session.sql( """ |CREATE OR REPLACE TEMPORARY VIEW nullData AS SELECT * FROM VALUES @@ -289,6 +295,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext { |as nullData(key, value) """.stripMargin) + // A data set with logical partition columns ("ds" and "hr") Seq((251, "val_251", "2008-04-08", "11"), (251, "val_251", "2008-04-09", "11"), (251, "val_251", "2008-04-08", "12"), @@ -310,7 +317,7 @@ class SQLQueryTestSuite extends QueryTest with SharedSQLContext { (165, "val_165", "2008-04-08", "12"), (165, "val_165", "2008-04-09", "12")) .toDF("key", "value", "ds", "hr") - .createOrReplaceTempView("srcpart") + .createOrReplaceTempView("partitionedData") } private val originalTimeZone = TimeZone.getDefault From 1969ca2130e3b430db63fce1120b5e08512534cc Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Sun, 21 Aug 2016 23:39:17 -0700 Subject: [PATCH 26/27] fix. --- .../spark/sql/execution/command/views.scala | 8 ++- .../sql/hive/execution/SQLViewSuite.scala | 61 ++++++++++++++++++- 2 files changed, 67 insertions(+), 2 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala index e397cfa058e24..19a1d8565ff61 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala @@ -105,7 +105,13 @@ case class CreateViewCommand( } val sessionState = sparkSession.sessionState - if (isTemporary) { + // 1) CREATE VIEW: create a temp view when users explicitly specify the keyword TEMPORARY; + // otherwise, create a permanent view no matter whether the temporary view + // with the same name exists or not. + // 2) ALTER VIEW: alter the temporary view if the temp view exists; otherwise, try to alter + // the permanent view. Here, it follows the same resolution like DROP VIEW, + // since users are unable to specify the keyword TEMPORARY. + if (isTemporary || (replace && sessionState.catalog.isTemporaryTable(name))) { createTemporaryView(sparkSession, analyzedPlan) } else { // Adds default database for permanent table if it doesn't exist, so that tableExists() diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLViewSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLViewSuite.scala index 6a80664417911..14166f2532f04 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLViewSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLViewSuite.scala @@ -18,6 +18,8 @@ package org.apache.spark.sql.hive.execution import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode} +import org.apache.spark.sql.catalyst.TableIdentifier +import org.apache.spark.sql.catalyst.catalog.CatalogTableType import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.test.SQLTestUtils @@ -204,10 +206,67 @@ class SQLViewSuite extends QueryTest with SQLTestUtils with TestHiveSingleton { } } - test("should allow CREATE permanent VIEW when a TEMPORARY VIEW with same name exists") { + test("ALTER VIEW: alter a temporary view when a permanent VIEW with same name exists") { + verifyAlterViewWithIdenticalName(isTempAlteredView = true) + } + + test("ALTER VIEW: alter a persistent view when a temp VIEW with same name exists") { + verifyAlterViewWithIdenticalName(isTempAlteredView = false) + } + + private def verifyAlterViewWithIdenticalName (isTempAlteredView: Boolean) = { + withView("testView", "default.testView") { + val catalog = spark.sessionState.catalog + val oldViewQuery = "SELECT id FROM jt" + val newViewQuery = "SELECT id, id1 FROM jt" + sql(s"CREATE VIEW default.testView AS $oldViewQuery") + sql(s"CREATE TEMPORARY VIEW testView AS $oldViewQuery") + if (isTempAlteredView) { + // When the database is not specified, we will first try to alter the temporary view + sql(s"ALTER VIEW testView AS $newViewQuery") + } else { + // When the database is specified, we will try to alter the permanent view, no matter + // whether the temporary view with the same name exists or not. + sql(s"ALTER VIEW default.testView AS $newViewQuery") + } + + val persistentView = catalog.getTableMetadata( + TableIdentifier(table = "testView", database = Some("default"))) + assert(persistentView.tableType == CatalogTableType.VIEW) + val tempView = catalog.getTableMetadata(TableIdentifier("testView")) + assert(tempView.tableType == CatalogTableType.VIEW) + assert(tempView.viewOriginalText.isEmpty) + + if (isTempAlteredView) { + // View Text of the persistent view default.testView is changed + assert(persistentView.viewOriginalText == Option(oldViewQuery)) + // temp view testView is changed + checkAnswer( + sql(newViewQuery), + sql("select * from testView")) + } else { + // View Text of the persistent view default.testView is changed + assert(persistentView.viewOriginalText == Option(newViewQuery)) + // temp view testView is not changed + checkAnswer( + sql(oldViewQuery), + sql("select * from testView")) + } + } + } + + test("CREATE VIEW: should allow CREATE permanent VIEW when a temp VIEW with same name exists") { withView("testView", "default.testView") { sql("CREATE TEMPORARY VIEW testView AS SELECT id FROM jt") sql("CREATE VIEW testView AS SELECT id FROM jt") + + // Both temporary and permanent view have been successfully created. + val catalog = spark.sessionState.catalog + val persistentView = catalog.getTableMetadata( + TableIdentifier(table = "testView", database = Some("default"))) + assert(persistentView.tableType == CatalogTableType.VIEW) + val tempView = catalog.getTableMetadata(TableIdentifier("testView")) + assert(tempView.tableType == CatalogTableType.VIEW) } } From e2677da2d1dc9bcfbcdf0d998104586ada95fae2 Mon Sep 17 00:00:00 2001 From: gatorsmile Date: Sun, 21 Aug 2016 23:41:56 -0700 Subject: [PATCH 27/27] revert --- .../spark/sql/execution/command/views.scala | 8 +-- .../sql/hive/execution/SQLViewSuite.scala | 61 +------------------ 2 files changed, 2 insertions(+), 67 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala index 19a1d8565ff61..e397cfa058e24 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/views.scala @@ -105,13 +105,7 @@ case class CreateViewCommand( } val sessionState = sparkSession.sessionState - // 1) CREATE VIEW: create a temp view when users explicitly specify the keyword TEMPORARY; - // otherwise, create a permanent view no matter whether the temporary view - // with the same name exists or not. - // 2) ALTER VIEW: alter the temporary view if the temp view exists; otherwise, try to alter - // the permanent view. Here, it follows the same resolution like DROP VIEW, - // since users are unable to specify the keyword TEMPORARY. - if (isTemporary || (replace && sessionState.catalog.isTemporaryTable(name))) { + if (isTemporary) { createTemporaryView(sparkSession, analyzedPlan) } else { // Adds default database for permanent table if it doesn't exist, so that tableExists() diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLViewSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLViewSuite.scala index 14166f2532f04..6a80664417911 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLViewSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/SQLViewSuite.scala @@ -18,8 +18,6 @@ package org.apache.spark.sql.hive.execution import org.apache.spark.sql.{AnalysisException, QueryTest, Row, SaveMode} -import org.apache.spark.sql.catalyst.TableIdentifier -import org.apache.spark.sql.catalyst.catalog.CatalogTableType import org.apache.spark.sql.hive.test.TestHiveSingleton import org.apache.spark.sql.test.SQLTestUtils @@ -206,67 +204,10 @@ class SQLViewSuite extends QueryTest with SQLTestUtils with TestHiveSingleton { } } - test("ALTER VIEW: alter a temporary view when a permanent VIEW with same name exists") { - verifyAlterViewWithIdenticalName(isTempAlteredView = true) - } - - test("ALTER VIEW: alter a persistent view when a temp VIEW with same name exists") { - verifyAlterViewWithIdenticalName(isTempAlteredView = false) - } - - private def verifyAlterViewWithIdenticalName (isTempAlteredView: Boolean) = { - withView("testView", "default.testView") { - val catalog = spark.sessionState.catalog - val oldViewQuery = "SELECT id FROM jt" - val newViewQuery = "SELECT id, id1 FROM jt" - sql(s"CREATE VIEW default.testView AS $oldViewQuery") - sql(s"CREATE TEMPORARY VIEW testView AS $oldViewQuery") - if (isTempAlteredView) { - // When the database is not specified, we will first try to alter the temporary view - sql(s"ALTER VIEW testView AS $newViewQuery") - } else { - // When the database is specified, we will try to alter the permanent view, no matter - // whether the temporary view with the same name exists or not. - sql(s"ALTER VIEW default.testView AS $newViewQuery") - } - - val persistentView = catalog.getTableMetadata( - TableIdentifier(table = "testView", database = Some("default"))) - assert(persistentView.tableType == CatalogTableType.VIEW) - val tempView = catalog.getTableMetadata(TableIdentifier("testView")) - assert(tempView.tableType == CatalogTableType.VIEW) - assert(tempView.viewOriginalText.isEmpty) - - if (isTempAlteredView) { - // View Text of the persistent view default.testView is changed - assert(persistentView.viewOriginalText == Option(oldViewQuery)) - // temp view testView is changed - checkAnswer( - sql(newViewQuery), - sql("select * from testView")) - } else { - // View Text of the persistent view default.testView is changed - assert(persistentView.viewOriginalText == Option(newViewQuery)) - // temp view testView is not changed - checkAnswer( - sql(oldViewQuery), - sql("select * from testView")) - } - } - } - - test("CREATE VIEW: should allow CREATE permanent VIEW when a temp VIEW with same name exists") { + test("should allow CREATE permanent VIEW when a TEMPORARY VIEW with same name exists") { withView("testView", "default.testView") { sql("CREATE TEMPORARY VIEW testView AS SELECT id FROM jt") sql("CREATE VIEW testView AS SELECT id FROM jt") - - // Both temporary and permanent view have been successfully created. - val catalog = spark.sessionState.catalog - val persistentView = catalog.getTableMetadata( - TableIdentifier(table = "testView", database = Some("default"))) - assert(persistentView.tableType == CatalogTableType.VIEW) - val tempView = catalog.getTableMetadata(TableIdentifier("testView")) - assert(tempView.tableType == CatalogTableType.VIEW) } }