Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
2 changes: 1 addition & 1 deletion .github/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ DEPLOY:
CONNECT:
- changed-files:
- any-glob-to-any-file: [
'connect/**/*',
'sql/connect/**/*',
'connector/connect/**/*',
'python/pyspark/sql/**/connect/**/*',
'python/pyspark/ml/**/connect/**/*'
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -606,7 +606,7 @@ jobs:
- name: Breaking change detection against branch-3.5
uses: bufbuild/buf-breaking-action@v1
with:
input: connect/common/src/main
input: sql/connect/common/src/main
against: 'https://github.com/apache/spark.git#branch=branch-3.5,subdir=connector/connect/common/src/main'
- name: Install Python 3.9
uses: actions/setup-python@v5
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/build_python_connect.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ jobs:
# Start a Spark Connect server for local
PYTHONPATH="python/lib/pyspark.zip:python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH" ./sbin/start-connect-server.sh \
--driver-java-options "-Dlog4j.configurationFile=file:$GITHUB_WORKSPACE/conf/log4j2.properties" \
--jars "`find connect/server/target -name spark-connect-*SNAPSHOT.jar`,`find connector/protobuf/target -name spark-protobuf-*SNAPSHOT.jar`,`find connector/avro/target -name spark-avro*SNAPSHOT.jar`"
--jars "`find connector/protobuf/target -name spark-protobuf-*SNAPSHOT.jar`,`find connector/avro/target -name spark-avro*SNAPSHOT.jar`"

# Remove Py4J and PySpark zipped library to make sure there is no JVM connection
mv python/lib lib.back
Expand All @@ -104,7 +104,7 @@ jobs:
PYTHONPATH="python/lib/pyspark.zip:python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH" ./sbin/start-connect-server.sh \
--master "local-cluster[2, 4, 1024]" \
--driver-java-options "-Dlog4j.configurationFile=file:$GITHUB_WORKSPACE/conf/log4j2.properties" \
--jars "`find connect/server/target -name spark-connect-*SNAPSHOT.jar`,`find connector/protobuf/target -name spark-protobuf-*SNAPSHOT.jar`,`find connector/avro/target -name spark-avro*SNAPSHOT.jar`"
--jars "`find connector/protobuf/target -name spark-protobuf-*SNAPSHOT.jar`,`find connector/avro/target -name spark-avro*SNAPSHOT.jar`"

# Remove Py4J and PySpark zipped library to make sure there is no JVM connection
mv python/lib lib.back
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/build_python_connect35.yml
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ jobs:
# Start a Spark Connect server for local
PYTHONPATH="python/lib/pyspark.zip:python/lib/py4j-0.10.9.7-src.zip:$PYTHONPATH" ./sbin/start-connect-server.sh \
--driver-java-options "-Dlog4j.configurationFile=file:$GITHUB_WORKSPACE/conf/log4j2.properties" \
--jars "`find connect/server/target -name spark-connect-*SNAPSHOT.jar`,`find connector/protobuf/target -name spark-protobuf-*SNAPSHOT.jar`,`find connector/avro/target -name spark-avro*SNAPSHOT.jar`"
--jars "`find connector/protobuf/target -name spark-protobuf-*SNAPSHOT.jar`,`find connector/avro/target -name spark-avro*SNAPSHOT.jar`"

# Checkout to branch-3.5 to use the tests in branch-3.5.
cd ..
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/maven_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ jobs:
if [[ "$INCLUDED_TAGS" != "" ]]; then
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} -Dtest.include.tags="$INCLUDED_TAGS" test -fae
elif [[ "$MODULES_TO_TEST" == "connect" ]]; then
./build/mvn $MAVEN_CLI_OPTS -Dtest.exclude.tags="$EXCLUDED_TAGS" -Djava.version=${JAVA_VERSION/-ea} -pl connector/connect/client/jvm,connect/common,connect/server test -fae
./build/mvn $MAVEN_CLI_OPTS -Dtest.exclude.tags="$EXCLUDED_TAGS" -Djava.version=${JAVA_VERSION/-ea} -pl connector/connect/client/jvm,sql/connect/common,sql/connect/server test -fae
elif [[ "$EXCLUDED_TAGS" != "" ]]; then
./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} -Dtest.exclude.tags="$EXCLUDED_TAGS" test -fae
elif [[ "$MODULES_TO_TEST" == *"sql#hive-thriftserver"* ]]; then
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,7 @@ class ClientE2ETestSuite
val testDataPath = java.nio.file.Paths
.get(
IntegrationTestUtils.sparkHome,
"sql",
"connect",
"common",
"src",
Expand Down Expand Up @@ -347,6 +348,7 @@ class ClientE2ETestSuite
val testDataPath = java.nio.file.Paths
.get(
IntegrationTestUtils.sparkHome,
"sql",
"connect",
"common",
"src",
Expand Down Expand Up @@ -377,6 +379,7 @@ class ClientE2ETestSuite
val testDataPath = java.nio.file.Paths
.get(
IntegrationTestUtils.sparkHome,
"sql",
"connect",
"common",
"src",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ import org.apache.spark.util.SparkFileUtils
* compatibility.
*
* Note that the plan protos are used as the input for the `ProtoToParsedPlanTestSuite` in the
* `connect/server` module
* `sql/connect/server` module
*/
// scalastyle:on
class PlanGenerationTestSuite
Expand All @@ -88,7 +88,7 @@ class PlanGenerationTestSuite

protected val queryFilePath: Path = commonResourcePath.resolve("query-tests/queries")

// A relative path to /connect/server, used by `ProtoToParsedPlanTestSuite` to run
// A relative path to /sql/connect/server, used by `ProtoToParsedPlanTestSuite` to run
// with the datasource.
protected val testDataPath: Path = java.nio.file.Paths.get(
"../",
Expand Down Expand Up @@ -3325,10 +3325,10 @@ class PlanGenerationTestSuite
/* Protobuf functions */
// scalastyle:off line.size.limit
// If `common.desc` needs to be updated, execute the following command to regenerate it:
// 1. cd connect/common/src/main/protobuf/spark/connect
// 1. cd sql/connect/common/src/main/protobuf/spark/connect
// 2. protoc --include_imports --descriptor_set_out=../../../../test/resources/protobuf-tests/common.desc common.proto
// scalastyle:on line.size.limit
private val testDescFilePath: String = s"${IntegrationTestUtils.sparkHome}/connect/" +
private val testDescFilePath: String = s"${IntegrationTestUtils.sparkHome}/sql/connect/" +
"common/src/test/resources/protobuf-tests/common.desc"

// TODO(SPARK-45030): Re-enable this test when all Maven test scenarios succeed and there
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ class ClientStreamingQuerySuite extends QueryTest with RemoteSparkSession with L
private val testDataPath = Paths
.get(
IntegrationTestUtils.sparkHome,
"sql",
"connect",
"common",
"src",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,17 @@ trait ConnectFunSuite extends AnyFunSuite { // scalastyle:ignore funsuite
}

protected def baseResourcePath: Path = {
getWorkspaceFilePath("connect", "client", "jvm", "src", "test", "resources").toAbsolutePath
getWorkspaceFilePath(
"sql",
"connect",
"client",
"jvm",
"src",
"test",
"resources").toAbsolutePath
}

protected def commonResourcePath: Path = {
getWorkspaceFilePath("connect", "common", "src", "test", "resources").toAbsolutePath
getWorkspaceFilePath("sql", "connect", "common", "src", "test", "resources").toAbsolutePath
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ object SparkConnectServerUtils {
private lazy val sparkConnect: java.lang.Process = {
debug("Starting the Spark Connect Server...")
val connectJar =
findJar("connect/server", "spark-connect-assembly", "spark-connect").getCanonicalPath
findJar("sql/connect/server", "spark-connect-assembly", "spark-connect").getCanonicalPath

val command = Seq.newBuilder[String]
command += "bin/spark-submit"
Expand Down
2 changes: 1 addition & 1 deletion dev/connect-check-protos.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def check_connect_protos():
else:
fail(
"Generated files for pyspark-connect are out of sync! "
"If you have touched files under connect/common/src/main/protobuf/, "
"If you have touched files under sql/connect/common/src/main/protobuf/, "
"please run ./dev/connect-gen-protos.sh. "
"If you haven't touched any file above, please rebase your PR against main branch."
)
Expand Down
2 changes: 1 addition & 1 deletion dev/connect-gen-protos.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ if [[ $# -eq 1 ]]; then
OUTPUT_PATH=$1
fi

pushd connect/common/src/main
pushd sql/connect/common/src/main

LICENSE=$(cat <<'EOF'
#
Expand Down
6 changes: 3 additions & 3 deletions dev/lint-scala
Original file line number Diff line number Diff line change
Expand Up @@ -29,16 +29,16 @@ ERRORS=$(./build/mvn \
-Dscalafmt.skip=false \
-Dscalafmt.validateOnly=true \
-Dscalafmt.changedOnly=false \
-pl connect/common \
-pl connect/server \
-pl sql/connect/common \
-pl sql/connect/server \
-pl connector/connect/client/jvm \
2>&1 | grep -e "Unformatted files found" \
)

if test ! -z "$ERRORS"; then
echo -e "The scalafmt check failed on connect or connector/connect at following occurrences:\n\n$ERRORS\n"
echo "Before submitting your change, please make sure to format your code using the following command:"
echo "./build/mvn scalafmt:format -Dscalafmt.skip=false -Dscalafmt.validateOnly=false -Dscalafmt.changedOnly=false -pl connect/common -pl connect/server -pl connector/connect/client/jvm"
echo "./build/mvn scalafmt:format -Dscalafmt.skip=false -Dscalafmt.validateOnly=false -Dscalafmt.changedOnly=false -pl sql/connect/common -pl sql/connect/server -pl connector/connect/client/jvm"
exit 1
else
echo -e "Scalafmt checks passed."
Expand Down
2 changes: 1 addition & 1 deletion dev/protobuf-breaking-changes-check.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ if [[ $# -eq 1 ]]; then
BRANCH=$1
fi

pushd connect/common/src/main &&
pushd sql/connect/common/src/main &&
echo "Start protobuf breaking changes checking against $BRANCH" &&
buf breaking --against "https://github.com/apache/spark.git#branch=$BRANCH,subdir=connector/connect/common/src/main" &&
echo "Finsh protobuf breaking changes checking: SUCCESS"
Expand Down
2 changes: 1 addition & 1 deletion dev/sparktestsupport/modules.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,7 +324,7 @@ def __hash__(self):
name="connect",
dependencies=[hive, avro, protobuf],
source_file_regexes=[
"connect",
"sql/connect",
"connector/connect",
],
sbt_test_goals=[
Expand Down
4 changes: 2 additions & 2 deletions docs/spark-connect-overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ The customizations may also be passed in through CLI arguments as shown below:
spark-connect-repl --host myhost.com --port 443 --token ABCDEFG
{% endhighlight %}

The supported list of CLI arguments may be found [here](https://github.com/apache/spark/blob/master/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClientParser.scala#L48).
The supported list of CLI arguments may be found [here](https://github.com/apache/spark/blob/master/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/SparkConnectClientParser.scala).

#### Configure programmatically with a connection string

Expand Down Expand Up @@ -364,7 +364,7 @@ val spark = SparkSession.builder().remote("sc://localhost").build()


**Note**: Operations that reference User Defined Code such as UDFs, filter, map, etc require a
[ClassFinder](https://github.com/apache/spark/blob/bb41cd889efdd0602385e70b4c8f1c93740db332/connect/common/src/main/scala/org/apache/spark/sql/connect/client/ClassFinder.scala#L26)
[ClassFinder](https://github.com/apache/spark/blob/master/sql/connect/common/src/main/scala/org/apache/spark/sql/connect/client/ClassFinder.scala)
to be registered to pickup and upload any required classfiles. Also, any JAR dependencies must be uploaded to the server using `SparkSession#AddArtifact`.

Example:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,8 @@ List<String> buildClassPath(String appClassPath) throws IOException {
"common/sketch",
"common/tags",
"common/unsafe",
"connect/common",
"connect/server",
"sql/connect/common",
"sql/connect/server",
"core",
"examples",
"graphx",
Expand All @@ -174,7 +174,9 @@ List<String> buildClassPath(String appClassPath) throws IOException {
for (String project : projects) {
// Do not use locally compiled class files for Spark server because it should use shaded
// dependencies.
if (project.equals("connect/server") || project.equals("connect/common")) continue;
if (project.equals("sql/connect/server") || project.equals("sql/connect/common")) {
continue;
}
addToClassPath(cp, String.format("%s/%s/target/scala-%s/classes", sparkHome, project,
scala));
}
Expand Down
4 changes: 2 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -94,12 +94,12 @@
<module>sql/catalyst</module>
<module>sql/core</module>
<module>sql/hive</module>
<module>sql/connect/server</module>
<module>sql/connect/common</module>
<module>assembly</module>
<module>examples</module>
<module>repl</module>
<module>launcher</module>
<module>connect/server</module>
<module>connect/common</module>
<module>connector/kafka-0-10-token-provider</module>
<module>connector/kafka-0-10</module>
<module>connector/kafka-0-10-assembly</module>
Expand Down
2 changes: 1 addition & 1 deletion python/docs/source/development/testing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ Running Tests for Python Client
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

In order to test the changes in Protobuf definitions, for example, at
`spark/connect/common/src/main/protobuf/spark/connect <https://github.com/apache/spark/tree/master/connect/common/src/main/protobuf/spark/connect>`_,
`spark/sql/connect/common/src/main/protobuf/spark/connect <https://github.com/apache/spark/tree/master/sql/connect/common/src/main/protobuf/spark/connect>`_,
you should regenerate Python Protobuf client first by running ``dev/connect-gen-protos.sh``.


Expand Down
File renamed without changes.
2 changes: 1 addition & 1 deletion connect/common/pom.xml → sql/connect/common/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
<groupId>org.apache.spark</groupId>
<artifactId>spark-parent_2.13</artifactId>
<version>4.0.0-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
<relativePath>../../../pom.xml</relativePath>
</parent>

<artifactId>spark-connect-common_2.13</artifactId>
Expand Down
Loading