From 93371d8967916c8432198426ba281ec56c07c532 Mon Sep 17 00:00:00 2001
From: zhangxinyu1 <342689740@qq.com>
Date: Thu, 22 Sep 2016 17:41:32 +0800
Subject: [PATCH 1/3] Add HttpStreamSink. Streaming query results can be sinked
 to http server

---
 ...pache.spark.sql.sources.DataSourceRegister |   1 +
 .../execution/streaming/HttpStreamSink.scala  | 105 ++++++++++++++++++
 2 files changed, 106 insertions(+)
 create mode 100644 sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HttpStreamSink.scala

diff --git a/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister b/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
index 27d32b5dca431..48a61c3f6745d 100644
--- a/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
+++ b/sql/core/src/main/resources/META-INF/services/org.apache.spark.sql.sources.DataSourceRegister
@@ -5,3 +5,4 @@ org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat
 org.apache.spark.sql.execution.datasources.text.TextFileFormat
 org.apache.spark.sql.execution.streaming.ConsoleSinkProvider
 org.apache.spark.sql.execution.streaming.TextSocketSourceProvider
+org.apache.spark.sql.execution.streaming.HttpStreamSink
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HttpStreamSink.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HttpStreamSink.scala
new file mode 100644
index 0000000000000..8d8fdc75541bc
--- /dev/null
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HttpStreamSink.scala
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.streaming
+
+import java.io.{BufferedReader, InputStreamReader, PrintWriter}
+import java.net.{UnknownHostException, URL, URLConnection}
+
+import org.apache.spark.internal.Logging
+import org.apache.spark.sql._
+import org.apache.spark.sql.sources.{DataSourceRegister, StreamSinkProvider}
+import org.apache.spark.sql.streaming.OutputMode
+import org.apache.spark.util.Utils
+
+class HttpSink(options: Map[String, String]) extends Sink with Logging {
+  override def addBatch(batchId: Long, data: DataFrame): Unit = synchronized {
+    val dataFormat: HttpDataFormat = {
+      val className = options.getOrElse("format.class",
+        "org.apache.spark.sql.execution.streaming.HttpDataToStringDefault")
+      createObject[HttpDataFormat](className)
+    }
+    data.collect().foreach(dataSeq => {
+        post(dataFormat.format(dataSeq.toSeq))
+    })
+  }
+
+  private def createObject[T<:AnyRef](className: String, args: AnyRef*): T = {
+    val klass = Utils.classForName(className).asInstanceOf[Class[T]]
+    val constructor = klass.getConstructor(args.map(_.getClass): _*)
+    constructor.newInstance(args: _*)
+  }
+
+  private def post(param: String): Unit = {
+    val url: URL = new URL(options.get("url").get)
+    val connection: URLConnection = url.openConnection
+    connection.setDoInput(true)
+    connection.setDoOutput(true)
+    val writer = new PrintWriter(connection.getOutputStream)
+    try {
+      writer.print(param)
+      writer.flush()
+    } catch {
+      case cause: Throwable => {
+        logError("Post http request error: ", cause)
+      }
+    } finally {
+      writer.close()
+    }
+    val reader = new BufferedReader(new InputStreamReader(connection.getInputStream))
+    try {
+      val it = reader.lines().iterator()
+      var lines: String = ""
+      while (it.hasNext()) {
+        lines += it.next()
+      }
+      logTrace("Http request post result: " + lines)
+    } catch {
+      case cause: Throwable => {
+        logError("Read http result error: ", cause)
+      }
+    } finally {
+      reader.close()
+    }
+  }
+}
+
+trait HttpDataFormat{
+  def format(data: Seq[Any]): String
+}
+
+class HttpDataToStringDefault extends HttpDataFormat {
+  def format(data: Seq[Any]) : String = {
+    return data.mkString(", ")
+  }
+}
+
+class HttpStreamSink extends StreamSinkProvider with DataSourceRegister{
+  def createSink(
+                  sqlContext: SQLContext,
+                  parameters: Map[String, String],
+                  partitionColumns: Seq[String],
+                  outputMode: OutputMode): Sink = {
+    if (!parameters.contains("url")) {
+      throw new AnalysisException("Http url should be set: .option(\"url\", \"...\").")
+    }
+    new HttpSink(parameters)
+  }
+
+  def shortName(): String = "http"
+}
+

From d046e85fb6d0d68259abdf123032c304926d159f Mon Sep 17 00:00:00 2001
From: zhangxinyu1 <342689740@qq.com>
Date: Mon, 26 Sep 2016 18:54:51 +0800
Subject: [PATCH 2/3] 1. Replace trait HttpDataFormat by verifying that output
 dataFrame must hava a single string column. 2. Add HttpStreamSinkSuite to
 test

---
 sql/core/pom.xml                              |  6 ++
 .../execution/streaming/HttpStreamSink.scala  | 90 +++++++++----------
 .../streaming/HttpStreamSinkSuite.scala       | 38 ++++++++
 3 files changed, 85 insertions(+), 49 deletions(-)
 create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HttpStreamSinkSuite.scala

diff --git a/sql/core/pom.xml b/sql/core/pom.xml
index b2752638bebd5..7ccae20f993d9 100644
--- a/sql/core/pom.xml
+++ b/sql/core/pom.xml
@@ -126,6 +126,12 @@
       <groupId>org.apache.xbean</groupId>
       <artifactId>xbean-asm5-shaded</artifactId>
       <scope>test</scope>
+    </dependency>
+	<dependency>
+      <groupId>com.sparkjava</groupId>
+      <artifactId>spark-core</artifactId>
+      <version>2.5</version>
+      <scope>test</scope>
     </dependency>
   </dependencies>
   <build>
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HttpStreamSink.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HttpStreamSink.scala
index 8d8fdc75541bc..d9e8f642a2149 100644
--- a/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HttpStreamSink.scala
+++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/streaming/HttpStreamSink.scala
@@ -18,45 +18,65 @@
 package org.apache.spark.sql.execution.streaming
 
 import java.io.{BufferedReader, InputStreamReader, PrintWriter}
-import java.net.{UnknownHostException, URL, URLConnection}
+import java.net.{URL, URLConnection}
 
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql._
 import org.apache.spark.sql.sources.{DataSourceRegister, StreamSinkProvider}
 import org.apache.spark.sql.streaming.OutputMode
-import org.apache.spark.util.Utils
+import org.apache.spark.sql.types.{StringType, StructType}
 
+
+class HttpStreamSink extends StreamSinkProvider with DataSourceRegister{
+  override def createSink(
+      sqlContext: SQLContext,
+      parameters: Map[String, String],
+      partitionColumns: Seq[String],
+      outputMode: OutputMode): Sink = {
+    if (!parameters.contains("url")) {
+      throw new AnalysisException("Http url should be set: .option(\"url\", \"...\").")
+    }
+    new HttpSink(parameters)
+  }
+
+  override def shortName(): String = "http"
+}
+
+/**
+ * A sink that outputs streaming query results through sending http post request. Each [[Row]]
+ * in batch will be post to a http url.
+ * Each [[Row]] in batch must only have one single column, and the column type should be
+ * [[StringType]].
+ */
 class HttpSink(options: Map[String, String]) extends Sink with Logging {
   override def addBatch(batchId: Long, data: DataFrame): Unit = synchronized {
-    val dataFormat: HttpDataFormat = {
-      val className = options.getOrElse("format.class",
-        "org.apache.spark.sql.execution.streaming.HttpDataToStringDefault")
-      createObject[HttpDataFormat](className)
-    }
+    verifySchema(data.schema)
     data.collect().foreach(dataSeq => {
-        post(dataFormat.format(dataSeq.toSeq))
+        post(dataSeq.get(0).toString)
     })
   }
-
-  private def createObject[T<:AnyRef](className: String, args: AnyRef*): T = {
-    val klass = Utils.classForName(className).asInstanceOf[Class[T]]
-    val constructor = klass.getConstructor(args.map(_.getClass): _*)
-    constructor.newInstance(args: _*)
+  private def verifySchema(schema: StructType): Unit = {
+    if (schema.size != 1) {
+      throw new AnalysisException(
+        s"Http data sink supports only a single column, and you have ${schema.size} columns.")
+    }
+    val tpe = schema(0).dataType
+    if (tpe != StringType) {
+      throw new AnalysisException(
+        s"Http data sink supports only a string column, but you have ${tpe.simpleString}.")
+    }
   }
-
-  private def post(param: String): Unit = {
+  private def post(data: String): Unit = {
     val url: URL = new URL(options.get("url").get)
     val connection: URLConnection = url.openConnection
     connection.setDoInput(true)
     connection.setDoOutput(true)
     val writer = new PrintWriter(connection.getOutputStream)
     try {
-      writer.print(param)
+      writer.print(data)
       writer.flush()
     } catch {
-      case cause: Throwable => {
-        logError("Post http request error: ", cause)
-      }
+      case cause: Throwable => logError("Post http request error: ", cause)
     } finally {
       writer.close()
     }
@@ -67,39 +87,11 @@ class HttpSink(options: Map[String, String]) extends Sink with Logging {
       while (it.hasNext()) {
         lines += it.next()
       }
-      logTrace("Http request post result: " + lines)
+      logTrace(s"Http request post result: ${lines}.")
     } catch {
-      case cause: Throwable => {
-        logError("Read http result error: ", cause)
-      }
+      case cause: Throwable => logError("Read http result error: ", cause)
     } finally {
       reader.close()
     }
   }
 }
-
-trait HttpDataFormat{
-  def format(data: Seq[Any]): String
-}
-
-class HttpDataToStringDefault extends HttpDataFormat {
-  def format(data: Seq[Any]) : String = {
-    return data.mkString(", ")
-  }
-}
-
-class HttpStreamSink extends StreamSinkProvider with DataSourceRegister{
-  def createSink(
-                  sqlContext: SQLContext,
-                  parameters: Map[String, String],
-                  partitionColumns: Seq[String],
-                  outputMode: OutputMode): Sink = {
-    if (!parameters.contains("url")) {
-      throw new AnalysisException("Http url should be set: .option(\"url\", \"...\").")
-    }
-    new HttpSink(parameters)
-  }
-
-  def shortName(): String = "http"
-}
-
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HttpStreamSinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HttpStreamSinkSuite.scala
new file mode 100644
index 0000000000000..edd21f4cdcc8b
--- /dev/null
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HttpStreamSinkSuite.scala
@@ -0,0 +1,38 @@
+package org.apache.spark.sql.execution.streaming
+
+import java.util
+
+import org.apache.spark.sql.Row
+import org.apache.spark.sql.types.{StructField, StructType, StringType}
+import spark.{Route, Spark, Request, Response}
+import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
+import org.apache.spark.sql.streaming.StreamTest
+import org.apache.spark.sql.test.SharedSQLContext
+import org.scalatest.BeforeAndAfter
+
+class HttpStreamSinkSuite extends StreamTest with SharedSQLContext with BeforeAndAfter{
+  import testImplicits._
+  after {
+    sqlContext.streams.active.foreach(_.stop())
+  }
+  test("http sink"){
+    var output: String = ""
+    Spark.port(3775)
+    Spark.get("/welcome/:vistor", new Route{
+      override def handle(req: Request, resp: Response) : Object = {
+        val name: String = req.params(":vistor")
+        output = name
+        return s"welcome $name"
+      }
+    })
+    val input = MemoryStream[String]
+    val query = input.toDF().writeStream
+      .outputMode("complete")
+      .format("http")
+      .option("url", "http://localhost:3775/welcome")
+      .start()
+    input.addData("Jerry")
+    CheckAnswer(Row(output))
+    query.awaitTermination()
+  }
+}

From be748a1f74cae7464462c52594bc00ed14d37f98 Mon Sep 17 00:00:00 2001
From: zhangxinyu1 <342689740@qq.com>
Date: Mon, 26 Sep 2016 19:05:27 +0800
Subject: [PATCH 3/3] modify code style of HttpStreamSinkSuite

---
 .../spark/sql/execution/streaming/HttpStreamSinkSuite.scala  | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HttpStreamSinkSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HttpStreamSinkSuite.scala
index edd21f4cdcc8b..90ceca7d441a7 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HttpStreamSinkSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/streaming/HttpStreamSinkSuite.scala
@@ -1,11 +1,8 @@
 package org.apache.spark.sql.execution.streaming
 
-import java.util
+import spark.{Response, Request, Route, Spark}
 
 import org.apache.spark.sql.Row
-import org.apache.spark.sql.types.{StructField, StructType, StringType}
-import spark.{Route, Spark, Request, Response}
-import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
 import org.apache.spark.sql.streaming.StreamTest
 import org.apache.spark.sql.test.SharedSQLContext
 import org.scalatest.BeforeAndAfter