From 690c136bf1c1aabac0726b443077547dc47db53f Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Mon, 1 Apr 2019 19:25:38 -0700 Subject: [PATCH 1/3] [SPARK-26856][PYSPARK][FOLLOWUP] Fix wrong pattern for Kinesis assembly --- python/pyspark/testing/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pyspark/testing/utils.py b/python/pyspark/testing/utils.py index c6a528194f00..0f6875633daa 100644 --- a/python/pyspark/testing/utils.py +++ b/python/pyspark/testing/utils.py @@ -113,9 +113,9 @@ def search_jar(project_relative_path, jar_name_prefix): # Search jar in the project dir using the jar name_prefix for both sbt build and maven # build because the artifact jars are in different directories. sbt_build = glob.glob(os.path.join( - project_full_path, "target/scala-*/%s*.jar" % jar_name_prefix)) + project_full_path, "target/scala-*/%s-*.jar" % jar_name_prefix)) maven_build = glob.glob(os.path.join( - project_full_path, "target/%s*.jar" % jar_name_prefix)) + project_full_path, "target/%s_*.jar" % jar_name_prefix)) jar_paths = sbt_build + maven_build jars = [jar for jar in jar_paths if not jar.endswith(ignored_jar_suffixes)] From d738df7611857260fa67009a12b8e8cd95910b57 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Mon, 1 Apr 2019 21:31:45 -0700 Subject: [PATCH 2/3] Use two parameters. --- python/pyspark/sql/avro/functions.py | 2 +- python/pyspark/testing/streamingutils.py | 3 ++- python/pyspark/testing/utils.py | 6 +++--- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/python/pyspark/sql/avro/functions.py b/python/pyspark/sql/avro/functions.py index 81686df86679..e07b62528417 100644 --- a/python/pyspark/sql/avro/functions.py +++ b/python/pyspark/sql/avro/functions.py @@ -100,7 +100,7 @@ def _test(): import os import sys from pyspark.testing.utils import search_jar - avro_jar = search_jar("external/avro", "spark-avro") + avro_jar = search_jar("external/avro", "spark-avro", "spark-avro") if avro_jar is None: print( "Skipping all Avro Python tests as the optional Avro project was " diff --git a/python/pyspark/testing/streamingutils.py b/python/pyspark/testing/streamingutils.py index 3bed50721a98..4c27f8aad538 100644 --- a/python/pyspark/testing/streamingutils.py +++ b/python/pyspark/testing/streamingutils.py @@ -34,7 +34,8 @@ "was not set.") else: kinesis_asl_assembly_jar = search_jar("external/kinesis-asl-assembly", - "spark-streaming-kinesis-asl-assembly") + "spark-streaming-kinesis-asl-assembly-", + "spark-streaming-kinesis-asl-assembly_") if kinesis_asl_assembly_jar is None: kinesis_requirement_message = ( "Skipping all Kinesis Python tests as the optional Kinesis project was " diff --git a/python/pyspark/testing/utils.py b/python/pyspark/testing/utils.py index 0f6875633daa..00caed12069e 100644 --- a/python/pyspark/testing/utils.py +++ b/python/pyspark/testing/utils.py @@ -103,7 +103,7 @@ def close(self): pass -def search_jar(project_relative_path, jar_name_prefix): +def search_jar(project_relative_path, sbt_jar_name_prefix, mvn_jar_name_prefix): project_full_path = os.path.join( os.environ["SPARK_HOME"], project_relative_path) @@ -113,9 +113,9 @@ def search_jar(project_relative_path, jar_name_prefix): # Search jar in the project dir using the jar name_prefix for both sbt build and maven # build because the artifact jars are in different directories. sbt_build = glob.glob(os.path.join( - project_full_path, "target/scala-*/%s-*.jar" % jar_name_prefix)) + project_full_path, "target/scala-*/%s*.jar" % sbt_jar_name_prefix)) maven_build = glob.glob(os.path.join( - project_full_path, "target/%s_*.jar" % jar_name_prefix)) + project_full_path, "target/%s*.jar" % mvn_jar_name_prefix)) jar_paths = sbt_build + maven_build jars = [jar for jar in jar_paths if not jar.endswith(ignored_jar_suffixes)] From 0a03c16db0957a528746b445718ba602192a2406 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Mon, 1 Apr 2019 21:58:37 -0700 Subject: [PATCH 3/3] Add comments --- python/pyspark/testing/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/pyspark/testing/utils.py b/python/pyspark/testing/utils.py index 00caed12069e..2b42b898f9ed 100644 --- a/python/pyspark/testing/utils.py +++ b/python/pyspark/testing/utils.py @@ -104,6 +104,8 @@ def close(self): def search_jar(project_relative_path, sbt_jar_name_prefix, mvn_jar_name_prefix): + # Note that 'sbt_jar_name_prefix' and 'mvn_jar_name_prefix' are used since the prefix can + # vary for SBT or Maven specifically. See also SPARK-26856 project_full_path = os.path.join( os.environ["SPARK_HOME"], project_relative_path)