diff --git a/docs/ml-features.md b/docs/ml-features.md
index 142afac2f3f9..70cbfe74e7f7 100644
--- a/docs/ml-features.md
+++ b/docs/ml-features.md
@@ -37,7 +37,7 @@ In the following code segment, we start with a set of sentences.  We split each
 Refer to the [HashingTF Scala docs](api/scala/index.html#org.apache.spark.ml.feature.HashingTF) and
 the [IDF Scala docs](api/scala/index.html#org.apache.spark.ml.feature.IDF) for more details on the API.
 
-{% include_example scala/org/apache/spark/examples/ml/TfIdfExample.scala %}
+{% include_example scala/org/apache/spark/examples/ml/HashingTF.scala %}
 </div>
 
 <div data-lang="java" markdown="1">
@@ -45,7 +45,7 @@ the [IDF Scala docs](api/scala/index.html#org.apache.spark.ml.feature.IDF) for m
 Refer to the [HashingTF Java docs](api/java/org/apache/spark/ml/feature/HashingTF.html) and the
 [IDF Java docs](api/java/org/apache/spark/ml/feature/IDF.html) for more details on the API.
 
-{% include_example java/org/apache/spark/examples/ml/JavaTfIdfExample.java %}
+{% include_example java/org/apache/spark/examples/ml/JavaHashingTF.java %}
 </div>
 
 <div data-lang="python" markdown="1">
@@ -53,7 +53,24 @@ Refer to the [HashingTF Java docs](api/java/org/apache/spark/ml/feature/HashingT
 Refer to the [HashingTF Python docs](api/python/pyspark.ml.html#pyspark.ml.feature.HashingTF) and
 the [IDF Python docs](api/python/pyspark.ml.html#pyspark.ml.feature.IDF) for more details on the API.
 
-{% include_example python/ml/tf_idf_example.py %}
+{% highlight python %}
+from pyspark.ml.feature import HashingTF, IDF, Tokenizer
+
+sentenceData = sqlContext.createDataFrame([
+  (0, "Hi I heard about Spark"),
+  (0, "I wish Java could use case classes"),
+  (1, "Logistic regression models are neat")
+], ["label", "sentence"])
+tokenizer = Tokenizer(inputCol="sentence", outputCol="words")
+wordsData = tokenizer.transform(sentenceData)
+hashingTF = HashingTF(inputCol="words", outputCol="rawFeatures", numFeatures=20)
+featurizedData = hashingTF.transform(wordsData)
+idf = IDF(inputCol="rawFeatures", outputCol="features")
+idfModel = idf.fit(featurizedData)
+rescaledData = idfModel.transform(featurizedData)
+for features_label in rescaledData.select("features", "label").take(3):
+  print(features_label)
+{% endhighlight %}
 </div>
 </div>
 
@@ -74,7 +91,26 @@ In the following code segment, we start with a set of documents, each of which i
 Refer to the [Word2Vec Scala docs](api/scala/index.html#org.apache.spark.ml.feature.Word2Vec)
 for more details on the API.
 
-{% include_example scala/org/apache/spark/examples/ml/Word2VecExample.scala %}
+{% highlight scala %}
+import org.apache.spark.ml.feature.Word2Vec
+
+// Input data: Each row is a bag of words from a sentence or document.
+val documentDF = sqlContext.createDataFrame(Seq(
+  "Hi I heard about Spark".split(" "),
+  "I wish Java could use case classes".split(" "),
+  "Logistic regression models are neat".split(" ")
+).map(Tuple1.apply)).toDF("text")
+
+// Learn a mapping from words to Vectors.
+val word2Vec = new Word2Vec()
+  .setInputCol("text")
+  .setOutputCol("result")
+  .setVectorSize(3)
+  .setMinCount(0)
+val model = word2Vec.fit(documentDF)
+val result = model.transform(documentDF)
+result.select("result").take(3).foreach(println)
+{% endhighlight %}
 </div>
 
 <div data-lang="java" markdown="1">
@@ -82,7 +118,43 @@ for more details on the API.
 Refer to the [Word2Vec Java docs](api/java/org/apache/spark/ml/feature/Word2Vec.html)
 for more details on the API.
 
-{% include_example java/org/apache/spark/examples/ml/JavaWord2VecExample.java %}
+{% highlight java %}
+import java.util.Arrays;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.types.*;
+
+JavaSparkContext jsc = ...
+SQLContext sqlContext = ...
+
+// Input data: Each row is a bag of words from a sentence or document.
+JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
+  RowFactory.create(Arrays.asList("Hi I heard about Spark".split(" "))),
+  RowFactory.create(Arrays.asList("I wish Java could use case classes".split(" "))),
+  RowFactory.create(Arrays.asList("Logistic regression models are neat".split(" ")))
+));
+StructType schema = new StructType(new StructField[]{
+  new StructField("text", new ArrayType(DataTypes.StringType, true), false, Metadata.empty())
+});
+DataFrame documentDF = sqlContext.createDataFrame(jrdd, schema);
+
+// Learn a mapping from words to Vectors.
+Word2Vec word2Vec = new Word2Vec()
+  .setInputCol("text")
+  .setOutputCol("result")
+  .setVectorSize(3)
+  .setMinCount(0);
+Word2VecModel model = word2Vec.fit(documentDF);
+DataFrame result = model.transform(documentDF);
+for (Row r: result.select("result").take(3)) {
+  System.out.println(r);
+}
+{% endhighlight %}
 </div>
 
 <div data-lang="python" markdown="1">
@@ -90,7 +162,22 @@ for more details on the API.
 Refer to the [Word2Vec Python docs](api/python/pyspark.ml.html#pyspark.ml.feature.Word2Vec)
 for more details on the API.
 
-{% include_example python/ml/word2vec_example.py %}
+{% highlight python %}
+from pyspark.ml.feature import Word2Vec
+
+# Input data: Each row is a bag of words from a sentence or document.
+documentDF = sqlContext.createDataFrame([
+  ("Hi I heard about Spark".split(" "), ),
+  ("I wish Java could use case classes".split(" "), ),
+  ("Logistic regression models are neat".split(" "), )
+], ["text"])
+# Learn a mapping from words to Vectors.
+word2Vec = Word2Vec(vectorSize=3, minCount=0, inputCol="text", outputCol="result")
+model = word2Vec.fit(documentDF)
+result = model.transform(documentDF)
+for feature in result.select("result").take(3):
+  print(feature)
+{% endhighlight %}
 </div>
 </div>
 
@@ -138,7 +225,30 @@ Refer to the [CountVectorizer Scala docs](api/scala/index.html#org.apache.spark.
 and the [CountVectorizerModel Scala docs](api/scala/index.html#org.apache.spark.ml.feature.CountVectorizerModel)
 for more details on the API.
 
-{% include_example scala/org/apache/spark/examples/ml/CountVectorizerExample.scala %}
+{% highlight scala %}
+import org.apache.spark.ml.feature.CountVectorizer
+import org.apache.spark.mllib.util.CountVectorizerModel
+
+val df = sqlContext.createDataFrame(Seq(
+  (0, Array("a", "b", "c")),
+  (1, Array("a", "b", "b", "c", "a"))
+)).toDF("id", "words")
+
+// fit a CountVectorizerModel from the corpus
+val cvModel: CountVectorizerModel = new CountVectorizer()
+  .setInputCol("words")
+  .setOutputCol("features")
+  .setVocabSize(3)
+  .setMinDF(2) // a term must appear in more or equal to 2 documents to be included in the vocabulary
+  .fit(df)
+
+// alternatively, define CountVectorizerModel with a-priori vocabulary
+val cvm = new CountVectorizerModel(Array("a", "b", "c"))
+  .setInputCol("words")
+  .setOutputCol("features")
+
+cvModel.transform(df).select("features").show()
+{% endhighlight %}
 </div>
 
 <div data-lang="java" markdown="1">
@@ -147,7 +257,40 @@ Refer to the [CountVectorizer Java docs](api/java/org/apache/spark/ml/feature/Co
 and the [CountVectorizerModel Java docs](api/java/org/apache/spark/ml/feature/CountVectorizerModel.html)
 for more details on the API.
 
-{% include_example java/org/apache/spark/examples/ml/JavaCountVectorizerExample.java %}
+{% highlight java %}
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.ml.feature.CountVectorizer;
+import org.apache.spark.ml.feature.CountVectorizerModel;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.*;
+
+// Input data: Each row is a bag of words from a sentence or document.
+JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
+  RowFactory.create(Arrays.asList("a", "b", "c")),
+  RowFactory.create(Arrays.asList("a", "b", "b", "c", "a"))
+));
+StructType schema = new StructType(new StructField [] {
+  new StructField("text", new ArrayType(DataTypes.StringType, true), false, Metadata.empty())
+});
+DataFrame df = sqlContext.createDataFrame(jrdd, schema);
+
+// fit a CountVectorizerModel from the corpus
+CountVectorizerModel cvModel = new CountVectorizer()
+  .setInputCol("text")
+  .setOutputCol("feature")
+  .setVocabSize(3)
+  .setMinDF(2) // a term must appear in more or equal to 2 documents to be included in the vocabulary
+  .fit(df);
+
+// alternatively, define CountVectorizerModel with a-priori vocabulary
+CountVectorizerModel cvm = new CountVectorizerModel(new String[]{"a", "b", "c"})
+  .setInputCol("text")
+  .setOutputCol("feature");
+
+cvModel.transform(df).show();
+{% endhighlight %}
 </div>
 </div>
 
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaBinarizer.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaBinarizer.java
new file mode 100644
index 000000000000..e3e5a14f5d02
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaBinarizer.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import java.util.Arrays;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.ml.feature.Binarizer;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+/**
+ * An example demonstrating a Binarizer.
+ * Run with
+ * <pre>
+ * bin/run-example ml.JavaBinarizer <file> <k>
+ * </pre>
+ */
+public class JavaBinarizer {
+
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("JavaBinarizer");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+    SQLContext jsql = new SQLContext(jsc);
+
+    JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
+        RowFactory.create(0, 0.1),
+        RowFactory.create(1, 0.8),
+        RowFactory.create(2, 0.2)
+    ));
+    StructType schema = new StructType(new StructField[]{
+        new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
+        new StructField("feature", DataTypes.DoubleType, false, Metadata.empty())
+    });
+    DataFrame continuousDataFrame = jsql.createDataFrame(jrdd, schema);
+    Binarizer binarizer = new Binarizer()
+      .setInputCol("feature")
+      .setOutputCol("binarized_feature")
+      .setThreshold(0.5);
+    DataFrame binarizedDataFrame = binarizer.transform(continuousDataFrame);
+    DataFrame binarizedFeatures = binarizedDataFrame.select("binarized_feature");
+    for (Row r : binarizedFeatures.collect()) {
+    Double binarized_value = r.getDouble(0);
+      System.out.println(binarized_value);
+    }
+  }
+}
\ No newline at end of file
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaBucketizer.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaBucketizer.java
new file mode 100644
index 000000000000..f329e2d1caf9
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaBucketizer.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import java.util.Arrays;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.ml.feature.Bucketizer;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+/**
+ * An example demonstrating a Bucketizer.
+ * Run with
+ * <pre>
+ * bin/run-example ml.JavaBucketizer <file> <k>
+ * </pre>
+ */
+public class JavaBucketizer {
+
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("JavaBucketizer");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+    SQLContext jsql = new SQLContext(jsc);
+
+    double[] splits = {Double.NEGATIVE_INFINITY, -0.5, 0.0, 0.5, Double.POSITIVE_INFINITY};
+
+    JavaRDD<Row> data = jsc.parallelize(Arrays.asList(
+        RowFactory.create(-0.5),
+        RowFactory.create(-0.3),
+        RowFactory.create(0.0),
+        RowFactory.create(0.2)
+    ));
+    StructType schema = new StructType(new StructField[]{
+        new StructField("features", DataTypes.DoubleType, false, Metadata.empty())
+    });
+    DataFrame dataFrame = jsql.createDataFrame(data, schema);
+
+    Bucketizer bucketizer = new Bucketizer()
+        .setInputCol("features")
+        .setOutputCol("bucketedFeatures")
+        .setSplits(splits);
+
+    // Transform original data into its bucket index.
+    DataFrame bucketedData = bucketizer.transform(dataFrame);
+
+    }
+}
+
+
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaDCT.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaDCT.java
new file mode 100644
index 000000000000..b71ef59c56e8
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaDCT.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import java.util.Arrays;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.ml.feature.DCT;
+import org.apache.spark.mllib.linalg.VectorUDT;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+/**
+ * An example demonstrating a discrete cosine transform.
+ * Run with
+ * <pre>
+ * bin/run-example ml.JavaDCT <file> <k>
+ * </pre>
+ */
+public class JavaDCT {
+
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("JavaDCT");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+    SQLContext jsql = new SQLContext(jsc);
+
+    JavaRDD<Row> data = jsc.parallelize(Arrays.asList(
+        RowFactory.create(Vectors.dense(0.0, 1.0, -2.0, 3.0)),
+        RowFactory.create(Vectors.dense(-1.0, 2.0, 4.0, -7.0)),
+        RowFactory.create(Vectors.dense(14.0, -2.0, -5.0, 1.0))
+    ));
+    StructType schema = new StructType(new StructField[]{
+        new StructField("features", new VectorUDT(), false, Metadata.empty()),
+    });
+    DataFrame df = jsql.createDataFrame(data, schema);
+    DCT dct = new DCT()
+        .setInputCol("features")
+        .setOutputCol("featuresDCT")
+        .setInverse(false);
+    DataFrame dctDf = dct.transform(df);
+        dctDf.select("featuresDCT").show(3);
+  }
+}
+
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaElementwiseProduct.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaElementwiseProduct.java
new file mode 100644
index 000000000000..61569a32c442
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaElementwiseProduct.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.ml.feature.ElementwiseProduct;
+import org.apache.spark.mllib.linalg.Vector;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+/**
+ * An example demonstrating a Element wise Product.
+ * Run with
+ * <pre>
+ * bin/run-example ml.JavaElementwiseProduct <file> <k>
+ * </pre>
+ */
+public class JavaElementwiseProduct {
+
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("JavaElementwiseProduct");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+    SQLContext sqlContext = new SQLContext(jsc);
+
+    // Create some vector data; also works for sparse vectors
+    JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
+        RowFactory.create("a", Vectors.dense(1.0, 2.0, 3.0)),
+        RowFactory.create("b", Vectors.dense(4.0, 5.0, 6.0))
+    ));
+    List<StructField> fields = new ArrayList<StructField>(2);
+    fields.add(DataTypes.createStructField("id", DataTypes.StringType, false));
+    fields.add(DataTypes.createStructField("vector", DataTypes.StringType, false));
+    StructType schema = DataTypes.createStructType(fields);
+    DataFrame dataFrame = sqlContext.createDataFrame(jrdd, schema);
+    Vector transformingVector = Vectors.dense(0.0, 1.0, 2.0);
+    ElementwiseProduct transformer = new ElementwiseProduct()
+        .setScalingVec(transformingVector)
+        .setInputCol("vector")
+        .setOutputCol("transformedVector");
+    // Batch transform the vectors to create new column:
+    transformer.transform(dataFrame).show();
+  }
+}
\ No newline at end of file
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaNGram.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaNGram.java
new file mode 100644
index 000000000000..a775b9ce911b
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaNGram.java
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import java.util.Arrays;
+
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.ml.feature.NGram;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.sql.SQLContext;
+
+/**
+ * An example demonstrating a n-gram.
+ * Run with
+ * <pre>
+ * bin/run-example ml.JavaNGram <file> <k>
+ * </pre>
+ */
+public class JavaNGram {
+
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("JavaNGram");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+    SQLContext sqlContext = new SQLContext(jsc);
+
+    JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
+        RowFactory.create(0.0, Arrays.asList("Hi", "I", "heard", "about", "Spark")),
+        RowFactory.create(1.0, Arrays.asList("I", "wish", "Java", "could", "use", "case", "classes")),
+        RowFactory.create(2.0, Arrays.asList("Logistic", "regression", "models", "are", "neat"))
+    ));
+    StructType schema = new StructType(new StructField[]{
+        new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
+        new StructField("words", DataTypes.createArrayType(DataTypes.StringType), false, Metadata.empty())
+    });
+    DataFrame wordDataFrame = sqlContext.createDataFrame(jrdd, schema);
+    NGram ngramTransformer = new NGram().setInputCol("words").setOutputCol("ngrams");
+    DataFrame ngramDataFrame = ngramTransformer.transform(wordDataFrame);
+    for (Row r : ngramDataFrame.select("ngrams", "label").take(3)) {
+      java.util.List<String> ngrams = r.getList(0);
+      for (String ngram : ngrams) System.out.print(ngram + " --- ");
+      System.out.println();
+    }
+  }
+}
\ No newline at end of file
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaOneHotEncoder.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaOneHotEncoder.java
new file mode 100644
index 000000000000..966f7f32a198
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaOneHotEncoder.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import java.util.Arrays;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.ml.feature.OneHotEncoder;
+import org.apache.spark.ml.feature.StringIndexer;
+import org.apache.spark.ml.feature.StringIndexerModel;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+/**
+ * An example demonstrating a one-hot encoding.
+ * Run with
+ * <pre>
+ * bin/run-example ml.JavaOneHotEncoder <file> <k>
+ * </pre>
+ */
+public class JavaOneHotEncoder {
+
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("JavaOneHotEncoder");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+    SQLContext sqlContext = new SQLContext(jsc);
+
+    JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
+        RowFactory.create(0, "a"),
+        RowFactory.create(1, "b"),
+        RowFactory.create(2, "c"),
+        RowFactory.create(3, "a"),
+        RowFactory.create(4, "a"),
+        RowFactory.create(5, "c")
+    ));
+    StructType schema = new StructType(new StructField[]{
+        new StructField("id", DataTypes.DoubleType, false, Metadata.empty()),
+        new StructField("category", DataTypes.StringType, false, Metadata.empty())
+    });
+    DataFrame df = sqlContext.createDataFrame(jrdd, schema);
+    StringIndexerModel indexer = new StringIndexer()
+        .setInputCol("category")
+        .setOutputCol("categoryIndex")
+        .fit(df);
+    DataFrame indexed = indexer.transform(df);
+
+    OneHotEncoder encoder = new OneHotEncoder()
+        .setInputCol("categoryIndex")
+        .setOutputCol("categoryVec");
+    DataFrame encoded = encoder.transform(indexed);
+
+    }
+}
+
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaPCAExample.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaPCAExample.java
new file mode 100644
index 000000000000..701b184c0c68
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaPCAExample.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import java.util.Arrays;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.ml.feature.PCA;
+import org.apache.spark.ml.feature.PCAModel;
+import org.apache.spark.mllib.linalg.VectorUDT;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+/**
+ * An example demonstrating a Principal Component Analysis(PCA).
+ * Run with
+ * <pre>
+ * bin/run-example ml.JavaPCAExample <file> <k>
+ * </pre>
+ */
+public class JavaPCAExample {
+
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("JavaPCAExample");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+    SQLContext jsql = new SQLContext(jsc);
+
+    JavaRDD<Row> data = jsc.parallelize(Arrays.asList(
+        RowFactory.create(Vectors.sparse(5, new int[]{1, 3}, new double[]{1.0, 7.0})),
+        RowFactory.create(Vectors.dense(2.0, 0.0, 3.0, 4.0, 5.0)),
+        RowFactory.create(Vectors.dense(4.0, 0.0, 0.0, 6.0, 7.0))
+    ));
+    StructType schema = new StructType(new StructField[]{
+        new StructField("features", new VectorUDT(), false, Metadata.empty()),
+    });
+    DataFrame df = jsql.createDataFrame(data, schema);
+    PCAModel pca = new PCA()
+        .setInputCol("features")
+        .setOutputCol("pcaFeatures")
+        .setK(3)
+        .fit(df);
+    DataFrame result = pca.transform(df).select("pcaFeatures");
+    result.show();
+    }
+}
+
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaPolynomialExpansion.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaPolynomialExpansion.java
new file mode 100644
index 000000000000..60ff0ea20dbe
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaPolynomialExpansion.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import java.util.Arrays;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.ml.feature.PolynomialExpansion;
+import org.apache.spark.mllib.linalg.VectorUDT;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+/**
+ * An example demonstrating a polynomial expansion.
+ * Run with
+ * <pre>
+ * bin/run-example ml.JavaPolynomialExpansion <file> <k>
+ * </pre>
+ */
+public class JavaPolynomialExpansion {
+
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("JavaPolynomialExpansion");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+    SQLContext jsql = new SQLContext(jsc);
+
+
+    PolynomialExpansion polyExpansion = new PolynomialExpansion()
+        .setInputCol("features")
+        .setOutputCol("polyFeatures")
+        .setDegree(3);
+    JavaRDD<Row> data = jsc.parallelize(Arrays.asList(
+        RowFactory.create(Vectors.dense(-2.0, 2.3)),
+        RowFactory.create(Vectors.dense(0.0, 0.0)),
+        RowFactory.create(Vectors.dense(0.6, -1.1))
+    ));
+    StructType schema = new StructType(new StructField[]{
+        new StructField("features", new VectorUDT(), false, Metadata.empty()),
+    });
+    DataFrame df = jsql.createDataFrame(data, schema);
+    DataFrame polyDF = polyExpansion.transform(df);
+    Row[] row = polyDF.select("polyFeatures").take(3);
+    for (Row r : row) {
+      System.out.println(r.get(0));
+    }
+  }
+}
\ No newline at end of file
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaRFormula.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaRFormula.java
new file mode 100644
index 000000000000..f7b90cc2f248
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaRFormula.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import java.util.Arrays;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.ml.feature.RFormula;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.types.*;
+
+import static org.apache.spark.sql.types.DataTypes.*;
+
+/**
+ * An example demonstrating a R-Formula.
+ * Run with
+ * <pre>
+ * bin/run-example ml.JavaRFormula <file> <k>
+ * </pre>
+ */
+public class JavaRFormula {
+
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("JavaRFormula");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+    SQLContext sqlContext = new SQLContext(jsc);
+
+    StructType schema = createStructType(new StructField[]{
+        createStructField("id", IntegerType, false),
+        createStructField("country", StringType, false),
+        createStructField("hour", IntegerType, false),
+        createStructField("clicked", DoubleType, false)
+    });
+
+    JavaRDD<Row> rdd = jsc.parallelize(Arrays.asList(
+        RowFactory.create(7, "US", 18, 1.0),
+        RowFactory.create(8, "CA", 12, 0.0),
+        RowFactory.create(9, "NZ", 15, 0.0)
+    ));
+
+    DataFrame dataset = sqlContext.createDataFrame(rdd, schema);
+    RFormula formula = new RFormula()
+        .setFormula("clicked ~ country + hour")
+        .setFeaturesCol("features")
+        .setLabelCol("label");
+    DataFrame output = formula.fit(dataset).transform(dataset);
+    output.select("features", "label").show();
+  }
+}
+
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaStopWordsRemover.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaStopWordsRemover.java
new file mode 100644
index 000000000000..d31b076edc9a
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaStopWordsRemover.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+
+import java.util.Arrays;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.ml.feature.StopWordsRemover;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+/**
+ * An example demonstrating a stop words remover.
+ * Run with
+ * <pre>
+ * bin/run-example ml.JavaStopWordsRemover <file> <k>
+ * </pre>
+ */
+public class JavaStopWordsRemover {
+
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("JavaStopWordsRemover");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+    SQLContext jsql = new SQLContext(jsc);
+
+    StopWordsRemover remover = new StopWordsRemover()
+        .setInputCol("raw")
+        .setOutputCol("filtered");
+
+    JavaRDD<Row> rdd = jsc.parallelize(Arrays.asList(
+        RowFactory.create(Arrays.asList("I", "saw", "the", "red", "baloon")),
+        RowFactory.create(Arrays.asList("Mary", "had", "a", "little", "lamb"))
+    ));
+    StructType schema = new StructType(new StructField[]{
+        new StructField("raw", DataTypes.createArrayType(DataTypes.StringType), false, Metadata.empty())
+    });
+    DataFrame dataset = jsql.createDataFrame(rdd, schema);
+    remover.transform(dataset).show();
+    }
+}
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaStringIndexer.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaStringIndexer.java
new file mode 100644
index 000000000000..81716d7b1d13
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaStringIndexer.java
@@ -0,0 +1,68 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import java.util.Arrays;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.ml.feature.StringIndexer;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+import static org.apache.spark.sql.types.DataTypes.*;
+
+/**
+ * An example demonstrating a string indexer.
+ * Run with
+ * <pre>
+ * bin/run-example ml.JavaStringIndexer <file> <k>
+ * </pre>
+ */
+public class JavaStringIndexer {
+
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("JavaStringIndexer");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+    SQLContext sqlContext = new SQLContext(jsc);
+
+    JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
+        RowFactory.create(0, "a"),
+        RowFactory.create(1, "b"),
+        RowFactory.create(2, "c"),
+        RowFactory.create(3, "a"),
+        RowFactory.create(4, "a"),
+        RowFactory.create(5, "c")
+    ));
+    StructType schema = new StructType(new StructField[]{
+        createStructField("id", DoubleType, false),
+        createStructField("category", StringType, false)
+    });
+    DataFrame df = sqlContext.createDataFrame(jrdd, schema);
+    StringIndexer indexer = new StringIndexer()
+        .setInputCol("category")
+        .setOutputCol("categoryIndex");
+    DataFrame indexed = indexer.fit(df).transform(df);
+    indexed.show();
+  }
+}
\ No newline at end of file
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizer.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizer.java
new file mode 100644
index 000000000000..ce0829e76e35
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaTokenizer.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+
+import java.util.Arrays;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.ml.feature.RegexTokenizer;
+import org.apache.spark.ml.feature.Tokenizer;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.types.DataTypes;
+import org.apache.spark.sql.types.Metadata;
+import org.apache.spark.sql.types.StructField;
+import org.apache.spark.sql.types.StructType;
+
+/**
+ * An example demonstrating a tokenizer.
+ * Run with
+ * <pre>
+ * bin/run-example ml.JavaTokenizer <file> <k>
+ * </pre>
+ */
+public class JavaTokenizer {
+
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("JavaTokenizer");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+    SQLContext sqlContext = new SQLContext(jsc);
+
+    JavaRDD<Row> jrdd = jsc.parallelize(Arrays.asList(
+        RowFactory.create(0, "Hi I heard about Spark"),
+        RowFactory.create(1, "I wish Java could use case classes"),
+        RowFactory.create(2, "Logistic,regression,models,are,neat")
+    ));
+    StructType schema = new StructType(new StructField[]{
+        new StructField("label", DataTypes.DoubleType, false, Metadata.empty()),
+        new StructField("sentence", DataTypes.StringType, false, Metadata.empty())
+    });
+    DataFrame sentenceDataFrame = sqlContext.createDataFrame(jrdd, schema);
+    Tokenizer tokenizer = new Tokenizer().setInputCol("sentence").setOutputCol("words");
+    DataFrame wordsDataFrame = tokenizer.transform(sentenceDataFrame);
+    for (Row r : wordsDataFrame.select("words", "label"). take(3)){
+      java.util.List<String> words = r.getList(0);
+      for (String word : words) System.out.print(word + " ");
+      System.out.println();
+    }
+
+    RegexTokenizer regexTokenizer = new RegexTokenizer()
+        .setInputCol("sentence")
+        .setOutputCol("words")
+        .setPattern("\\W");  // alternatively .setPattern("\\w+").setGaps(false);
+  }
+}
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorAssembler.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorAssembler.java
new file mode 100644
index 000000000000..14f74276a012
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorAssembler.java
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import java.util.Arrays;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.ml.feature.VectorAssembler;
+import org.apache.spark.mllib.linalg.VectorUDT;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.types.*;
+
+import static org.apache.spark.sql.types.DataTypes.*;
+
+/**
+ * An example demonstrating a vector assembler.
+ * Run with
+ * <pre>
+ * bin/run-example ml.JavaVectorAssembler <file> <k>
+ * </pre>
+ */
+public class JavaVectorAssembler {
+
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("JavaVectorAssembler");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+    SQLContext sqlContext = new SQLContext(jsc);
+
+    StructType schema = createStructType(new StructField[]{
+        createStructField("id", IntegerType, false),
+        createStructField("hour", IntegerType, false),
+        createStructField("mobile", DoubleType, false),
+        createStructField("userFeatures", new VectorUDT(), false),
+        createStructField("clicked", DoubleType, false)
+    });
+    Row row = RowFactory.create(0, 18, 1.0, Vectors.dense(0.0, 10.0, 0.5), 1.0);
+    JavaRDD<Row> rdd = jsc.parallelize(Arrays.asList(row));
+    DataFrame dataset = sqlContext.createDataFrame(rdd, schema);
+
+    VectorAssembler assembler = new VectorAssembler()
+        .setInputCols(new String[]{"hour", "mobile", "userFeatures"})
+        .setOutputCol("features");
+
+    DataFrame output = assembler.transform(dataset);
+    System.out.println(output.select("features", "clicked").first());
+    }
+}
+
diff --git a/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorSlicer.java b/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorSlicer.java
new file mode 100644
index 000000000000..24d9296d8460
--- /dev/null
+++ b/examples/src/main/java/org/apache/spark/examples/ml/JavaVectorSlicer.java
@@ -0,0 +1,75 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml;
+
+import com.google.common.collect.Lists;
+
+import org.apache.spark.SparkConf;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.ml.attribute.Attribute;
+import org.apache.spark.ml.attribute.AttributeGroup;
+import org.apache.spark.ml.attribute.NumericAttribute;
+import org.apache.spark.ml.feature.VectorSlicer;
+import org.apache.spark.mllib.linalg.Vectors;
+import org.apache.spark.sql.DataFrame;
+import org.apache.spark.sql.Row;
+import org.apache.spark.sql.RowFactory;
+import org.apache.spark.sql.SQLContext;
+import org.apache.spark.sql.types.*;
+
+/**
+ * An example demonstrating a vector slicer.
+ * Run with
+ * <pre>
+ * bin/run-example ml.JavaVectorSlicer <file> <k>
+ * </pre>
+ */
+public class JavaVectorSlicer {
+
+  public static void main(String[] args) {
+    SparkConf conf = new SparkConf().setAppName("JavaVectorAssembler");
+    JavaSparkContext jsc = new JavaSparkContext(conf);
+    SQLContext jsql = new SQLContext(jsc);
+
+    Attribute[] attrs = new Attribute[]{
+        NumericAttribute.defaultAttr().withName("f1"),
+        NumericAttribute.defaultAttr().withName("f2"),
+        NumericAttribute.defaultAttr().withName("f3")
+    };
+    AttributeGroup group = new AttributeGroup("userFeatures", attrs);
+
+    JavaRDD<Row> jrdd = jsc.parallelize(Lists.newArrayList(
+        RowFactory.create(Vectors.sparse(3, new int[]{0, 1}, new double[]{-2.0, 2.3})),
+        RowFactory.create(Vectors.dense(-2.0, 2.3, 0.0))
+    ));
+
+    DataFrame dataset = jsql.createDataFrame(jrdd, (new StructType()).add(group.toStructField()));
+
+    VectorSlicer vectorSlicer = new VectorSlicer()
+        .setInputCol("userFeatures").setOutputCol("features");
+
+    vectorSlicer.setIndices(new int[]{1}).setNames(new String[]{"f3"});
+    // or slicer.setIndices(new int[]{1, 2}), or slicer.setNames(new String[]{"f2", "f3"})
+
+    DataFrame output = vectorSlicer.transform(dataset);
+
+    System.out.println(output.select("userFeatures", "features").first());
+    }
+}
+
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/BinarizerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/BinarizerExample.scala
new file mode 100644
index 000000000000..4dacba9c6b59
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/BinarizerExample.scala
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.ml.feature.Binarizer
+import org.apache.spark.sql.{SQLContext, DataFrame}
+
+/**
+ * An example runner for binarizer. Run with
+ * {{{
+ * ./bin/run-example ml.BinarizerExample [options]
+ * }}}
+ */
+object BinarizerExample {
+
+  val conf = new SparkConf().setAppName("BinarizerExample")
+  val sc = new SparkContext(conf)
+  val sqlContext = new SQLContext(sc)
+  val data = Array(
+    (0, 0.1),
+    (1, 0.8),
+    (2, 0.2)
+  )
+  val dataFrame: DataFrame = sqlContext.createDataFrame(data).toDF("label", "feature")
+
+  val binarizer: Binarizer = new Binarizer()
+    .setInputCol("feature")
+    .setOutputCol("binarized_feature")
+    .setThreshold(0.5)
+
+  val binarizedDataFrame = binarizer.transform(dataFrame)
+  val binarizedFeatures = binarizedDataFrame.select("binarized_feature")
+  binarizedFeatures.collect().foreach(println)
+}
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/BucketizerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/BucketizerExample.scala
new file mode 100644
index 000000000000..dc592c875aad
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/BucketizerExample.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.ml.feature.Bucketizer
+import org.apache.spark.sql.{SQLContext, DataFrame}
+
+/**
+ * An example runner for bucketizer. Run with
+ * {{{
+ * ./bin/run-example ml.BucketizerExample [options]
+ * }}}
+ */
+object BucketizerExample {
+
+  val conf = new SparkConf().setAppName("BucketizerExample")
+  val sc = new SparkContext(conf)
+  val sqlContext = new SQLContext(sc)
+
+  val splits = Array(Double.NegativeInfinity, -0.5, 0.0, 0.5, Double.PositiveInfinity)
+
+  val data = Array(-0.5, -0.3, 0.0, 0.2)
+  val dataFrame = sqlContext.createDataFrame(data.map(Tuple1.apply)).toDF("features")
+
+  val bucketizer = new Bucketizer()
+    .setInputCol("features")
+    .setOutputCol("bucketedFeatures")
+    .setSplits(splits)
+
+  // Transform original data into its bucket index.
+  val bucketedData = bucketizer.transform(dataFrame)
+}
+
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/DCTExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/DCTExample.scala
new file mode 100644
index 000000000000..1472cce070af
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/DCTExample.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.ml.feature.DCT
+import org.apache.spark.mllib.linalg.Vectors
+
+/**
+ * An example runner for discrete cosine transform. Run with
+ * {{{
+ * ./bin/run-example ml.DCTExample [options]
+ * }}}
+ */
+object DCTExample {
+
+  val conf = new SparkConf().setAppName("DCTExample")
+  val sc = new SparkContext(conf)
+  val sqlContext = new SQLContext(sc)
+
+  val data = Seq(
+    Vectors.dense(0.0, 1.0, -2.0, 3.0),
+    Vectors.dense(-1.0, 2.0, 4.0, -7.0),
+    Vectors.dense(14.0, -2.0, -5.0, 1.0))
+  val df = sqlContext.createDataFrame(data.map(Tuple1.apply)).toDF("features")
+  val dct = new DCT()
+    .setInputCol("features")
+    .setOutputCol("featuresDCT")
+    .setInverse(false)
+  val dctDf = dct.transform(df)
+  dctDf.select("featuresDCT").show(3)
+}
+
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/ElementWiseProductExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/ElementWiseProductExample.scala
new file mode 100644
index 000000000000..ad5217278e39
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/ElementWiseProductExample.scala
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.ml.feature.ElementwiseProduct
+import org.apache.spark.mllib.linalg.Vectors
+
+/**
+ * An example runner for element wise product. Run with
+ * {{{
+ * ./bin/run-example ml.ElementWiseProductExample [options]
+ * }}}
+ */
+object ElementWiseProductExample {
+
+  val conf = new SparkConf().setAppName("OneHotEncoderExample")
+  val sc = new SparkContext(conf)
+  val sqlContext = new SQLContext(sc)
+
+  // Create some vector data; also works for sparse vectors
+  val dataFrame = sqlContext.createDataFrame(Seq(
+    ("a", Vectors.dense(1.0, 2.0, 3.0)),
+    ("b", Vectors.dense(4.0, 5.0, 6.0)))).toDF("id", "vector")
+
+  val transformingVector = Vectors.dense(0.0, 1.0, 2.0)
+  val transformer = new ElementwiseProduct()
+    .setScalingVec(transformingVector)
+    .setInputCol("vector")
+    .setOutputCol("transformedVector")
+
+  // Batch transform the vectors to create new column:
+  transformer.transform(dataFrame).show()
+}
+
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/MinMaxScalerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/MinMaxScalerExample.scala
new file mode 100644
index 000000000000..6334caa7c4df
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/MinMaxScalerExample.scala
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.ml.feature.MinMaxScaler
+
+/**
+ * An example runner for min-max scaler. Run with
+ * {{{
+ * ./bin/run-example ml.MinMaxScalerExample [options]
+ * }}}
+ */
+object MinMaxScalerExample {
+
+  val conf = new SparkConf().setAppName("OneHotEncoderExample")
+  val sc = new SparkContext(conf)
+  val sqlContext = new SQLContext(sc)
+
+
+  val dataFrame = sqlContext.read.format("libsvm")
+    .load("data/mllib/sample_libsvm_data.txt")
+  val scaler = new MinMaxScaler()
+    .setInputCol("features")
+    .setOutputCol("scaledFeatures")
+
+  // Compute summary statistics and generate MinMaxScalerModel
+  val scalerModel = scaler.fit(dataFrame)
+
+  // rescale each feature to range [min, max].
+  val scaledData = scalerModel.transform(dataFrame)
+}
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/NGramExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/NGramExample.scala
new file mode 100644
index 000000000000..6e56cde93803
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/NGramExample.scala
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+
+package org.apache.spark.examples.ml
+
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.ml.feature.NGram
+
+/**
+ * An example runner for n-gram. Run with
+ * {{{
+ * ./bin/run-example ml.NGramExample [options]
+ * }}}
+ */
+object NGramExample {
+
+  val conf = new SparkConf().setAppName("NGramExample")
+  val sc = new SparkContext(conf)
+  val sqlContext = new SQLContext(sc)
+
+  val wordDataFrame = sqlContext.createDataFrame(Seq(
+    (0, Array("Hi", "I", "heard", "about", "Spark")),
+    (1, Array("I", "wish", "Java", "could", "use", "case", "classes")),
+    (2, Array("Logistic", "regression", "models", "are", "neat"))
+  )).toDF("label", "words")
+
+  val ngram = new NGram().setInputCol("words").setOutputCol("ngrams")
+  val ngramDataFrame = ngram.transform(wordDataFrame)
+  ngramDataFrame.take(3).map(_.getAs[Stream[String]]("ngrams").toList).foreach(println)
+}
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/NormalizerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/NormalizerExample.scala
new file mode 100644
index 000000000000..f07c9c83c66d
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/NormalizerExample.scala
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.ml.feature.Normalizer
+
+/**
+ * An example runner for normalizer. Run with
+ * {{{
+ * ./bin/run-example ml.NormalizerExample [options]
+ * }}}
+ */
+object NormalizerExample {
+
+  val conf = new SparkConf().setAppName("NormalizerExample")
+  val sc = new SparkContext(conf)
+  val sqlContext = new SQLContext(sc)
+
+  val dataFrame = sqlContext.read.format("libsvm")
+    .load("data/mllib/sample_libsvm_data.txt")
+
+  // Normalize each Vector using $L^1$ norm.
+  val normalizer = new Normalizer()
+    .setInputCol("features")
+    .setOutputCol("normFeatures")
+    .setP(1.0)
+  val l1NormData = normalizer.transform(dataFrame)
+
+  // Normalize each Vector using $L^\infty$ norm.
+  val lInfNormData = normalizer.transform(dataFrame, normalizer.p -> Double.PositiveInfinity)
+}
\ No newline at end of file
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/OneHotEncoderExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/OneHotEncoderExample.scala
new file mode 100644
index 000000000000..446fb03a222b
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/OneHotEncoderExample.scala
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.ml.feature.{OneHotEncoder, StringIndexer}
+
+/**
+ * An example runner for one hot encoder. Run with
+ * {{{
+ * ./bin/run-example ml.OneHotEncoderExample [options]
+ * }}}
+ */
+object OneHotEncoderExample {
+
+  val conf = new SparkConf().setAppName("OneHotEncoderExample")
+  val sc = new SparkContext(conf)
+  val sqlContext = new SQLContext(sc)
+
+  val df = sqlContext.createDataFrame(Seq(
+    (0, "a"),
+    (1, "b"),
+    (2, "c"),
+    (3, "a"),
+    (4, "a"),
+    (5, "c")
+  )).toDF("id", "category")
+
+  val indexer = new StringIndexer()
+    .setInputCol("category")
+    .setOutputCol("categoryIndex")
+    .fit(df)
+  val indexed = indexer.transform(df)
+
+  val encoder = new OneHotEncoder().setInputCol("categoryIndex").
+    setOutputCol("categoryVec")
+  val encoded = encoder.transform(indexed)
+  encoded.select("id", "categoryVec").foreach(println)
+}
+
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/PCAExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/PCAExample.scala
new file mode 100644
index 000000000000..c2e9a2f2057b
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/PCAExample.scala
@@ -0,0 +1,54 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+
+package org.apache.spark.examples.ml
+
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.ml.feature.PCA
+import org.apache.spark.mllib.linalg.Vectors
+
+/**
+ * An example runner for Principal Component Analysis(PCA). Run with
+ * {{{
+ * ./bin/run-example ml.PCAExample [options]
+ * }}}
+ */
+object PCAExample {
+
+  val conf = new SparkConf().setAppName("PCAExample")
+  val sc = new SparkContext(conf)
+  val sqlContext = new SQLContext(sc)
+
+  val data = Array(
+    Vectors.sparse(5, Seq((1, 1.0), (3, 7.0))),
+    Vectors.dense(2.0, 0.0, 3.0, 4.0, 5.0),
+    Vectors.dense(4.0, 0.0, 0.0, 6.0, 7.0)
+  )
+  val df = sqlContext.createDataFrame(data.map(Tuple1.apply)).toDF("features")
+  val pca = new PCA()
+    .setInputCol("features")
+    .setOutputCol("pcaFeatures")
+    .setK(3)
+    .fit(df)
+  val pcaDF = pca.transform(df)
+  val result = pcaDF.select("pcaFeatures")
+  result.show()
+}
+
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/PolynomialExpansionExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/PolynomialExpansionExample.scala
new file mode 100644
index 000000000000..4fa16b6ef491
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/PolynomialExpansionExample.scala
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.ml.feature.PolynomialExpansion
+import org.apache.spark.mllib.linalg.Vectors
+
+/**
+ * An example runner for polynomial expansion. Run with
+ * {{{
+ * ./bin/run-example ml.PolynomialExpansionExample [options]
+ * }}}
+ */
+object PolynomialExpansionExample {
+
+  val conf = new SparkConf().setAppName("PolynomialExpansionExample")
+  val sc = new SparkContext(conf)
+  val sqlContext = new SQLContext(sc)
+
+  val data = Array(
+    Vectors.dense(-2.0, 2.3),
+    Vectors.dense(0.0, 0.0),
+    Vectors.dense(0.6, -1.1)
+  )
+  val df = sqlContext.createDataFrame(data.map(Tuple1.apply)).toDF("features")
+  val polynomialExpansion = new PolynomialExpansion()
+    .setInputCol("features")
+    .setOutputCol("polyFeatures")
+    .setDegree(3)
+  val polyDF = polynomialExpansion.transform(df)
+  polyDF.select("polyFeatures").take(3).foreach(println)
+}
+
+
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/RFormulaExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/RFormulaExample.scala
new file mode 100644
index 000000000000..e50e2f07403e
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/RFormulaExample.scala
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.ml.feature.RFormula
+
+/**
+ * An example runner for R-formula. Run with
+ * {{{
+ * ./bin/run-example ml.RFormulaExample [options]
+ * }}}
+ */
+object RFormulaExample {
+
+  val conf = new SparkConf().setAppName("RFormulaExample")
+  val sc = new SparkContext(conf)
+  val sqlContext = new SQLContext(sc)
+
+  val dataset = sqlContext.createDataFrame(Seq(
+    (7, "US", 18, 1.0),
+    (8, "CA", 12, 0.0),
+    (9, "NZ", 15, 0.0)
+  )).toDF("id", "country", "hour", "clicked")
+  val formula = new RFormula()
+    .setFormula("clicked ~ country + hour")
+    .setFeaturesCol("features")
+    .setLabelCol("label")
+  val output = formula.fit(dataset).transform(dataset)
+  output.select("features", "label").show()
+}
\ No newline at end of file
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/StandardScalerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/StandardScalerExample.scala
new file mode 100644
index 000000000000..e2150001682d
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/StandardScalerExample.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.ml.feature.StandardScaler
+
+/**
+ * An example runner for standard scaler. Run with
+ * {{{
+ * ./bin/run-example ml.StandardScalerExample [options]
+ * }}}
+ */
+object StandardScalerExample {
+
+  val conf = new SparkConf().setAppName("StandardScalerExample")
+  val sc = new SparkContext(conf)
+  val sqlContext = new SQLContext(sc)
+
+  val dataFrame = sqlContext.read.format("libsvm")
+    .load("data/mllib/sample_libsvm_data.txt")
+  val scaler = new StandardScaler()
+    .setInputCol("features")
+    .setOutputCol("scaledFeatures")
+    .setWithStd(true)
+    .setWithMean(false)
+
+  // Compute summary statistics by fitting the StandardScaler
+  val scalerModel = scaler.fit(dataFrame)
+
+  // Normalize each feature to have unit standard deviation.
+  val scaledData = scalerModel.transform(dataFrame)
+}
\ No newline at end of file
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/StopWordsRemoverExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/StopWordsRemoverExample.scala
new file mode 100644
index 000000000000..2109a5ebc146
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/StopWordsRemoverExample.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+
+package org.apache.spark.examples.ml
+
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.ml.feature.StopWordsRemover
+
+/**
+ * An example runner for stop words remover. Run with
+ * {{{
+ * ./bin/run-example ml.StopWordsRemoverExample [options]
+ * }}}
+ */
+object StopWordsRemoverExample {
+
+  val conf = new SparkConf().setAppName("StopWordsRemoverExample")
+  val sc = new SparkContext(conf)
+  val sqlContext = new SQLContext(sc)
+
+  val remover = new StopWordsRemover()
+    .setInputCol("raw")
+    .setOutputCol("filtered")
+  val dataSet = sqlContext.createDataFrame(Seq(
+    (0, Seq("I", "saw", "the", "red", "baloon")),
+    (1, Seq("Mary", "had", "a", "little", "lamb"))
+  )).toDF("id", "raw")
+
+  remover.transform(dataSet).show()
+}
+
+// scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/StringIndexerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/StringIndexerExample.scala
new file mode 100644
index 000000000000..e858f64d52fd
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/StringIndexerExample.scala
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.ml.feature.StringIndexer
+
+/**
+ * An example runner for string indexer. Run with
+ * {{{
+ * ./bin/run-example ml.StringIndexerExample [options]
+ * }}}
+ */
+object StringIndexerExample {
+
+  val conf = new SparkConf().setAppName("StringIndexerExample")
+  val sc = new SparkContext(conf)
+  val sqlContext = new SQLContext(sc)
+
+  val df = sqlContext.createDataFrame(
+    Seq((0, "a"), (1, "b"), (2, "c"), (3, "a"), (4, "a"), (5, "c"))
+  ).toDF("id", "category")
+  val indexer = new StringIndexer()
+    .setInputCol("category")
+    .setOutputCol("categoryIndex")
+  val indexed = indexer.fit(df).transform(df)
+  indexed.show()
+}
+
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/TokenizerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/TokenizerExample.scala
new file mode 100644
index 000000000000..b3b28791fac5
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/TokenizerExample.scala
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// scalastyle:off println
+package org.apache.spark.examples.ml
+
+import org.apache.spark.ml.feature.{Tokenizer, RegexTokenizer}
+import org.apache.spark.{SparkConf, SparkContext}
+import org.apache.spark.sql.SQLContext
+
+/**
+ * An example runner for tokenizer. Run with
+ * {{{
+ * ./bin/run-example ml.TokenizerExample [options]
+ * }}}
+ */
+object TokenizerExample {
+  val conf = new SparkConf().setAppName("JavaTokenizerExample")
+  val sc = new SparkContext(conf)
+  val sqlContext = new SQLContext(sc)
+
+  val sentenceDataFrame = sqlContext.createDataFrame(Seq(
+    (0, "Hi I heard about Spark"),
+    (1, "I wish Java could use case classes"),
+    (2, "Logistic,regression,models,are,neat")
+  )).toDF("label", "sentence")
+
+  val tokenizer = new Tokenizer().setInputCol("sentence").setOutputCol("words")
+  val regexTokenizer = new RegexTokenizer()
+    .setInputCol("sentence")
+    .setOutputCol("words")
+    .setPattern("\\W") // alternatively .setPattern("\\w+").setGaps(false)
+
+  val tokenized = tokenizer.transform(sentenceDataFrame)
+  tokenized.select("words", "label").take(3).foreach(println)
+  val regexTokenized = regexTokenizer.transform(sentenceDataFrame)
+  regexTokenized.select("words", "label").take(3).foreach(println)
+}
+// scalastyle:on println
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/VectorAssemblerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/VectorAssemblerExample.scala
new file mode 100644
index 000000000000..bae230a2aedb
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/VectorAssemblerExample.scala
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.ml.feature.VectorAssembler
+
+/**
+ * An example runner for vector assembler. Run with
+ * {{{
+ * ./bin/run-example ml.VectorAssemblerExample [options]
+ * }}}
+ */
+object VectorAssemblerExample {
+
+  val conf = new SparkConf().setAppName("OneHotEncoderExample")
+  val sc = new SparkContext(conf)
+  val sqlContext = new SQLContext(sc)
+
+  val dataset = sqlContext.createDataFrame(
+    Seq((0, 18, 1.0, Vectors.dense(0.0, 10.0, 0.5), 1.0))
+  ).toDF("id", "hour", "mobile", "userFeatures", "clicked")
+  val assembler = new VectorAssembler()
+    .setInputCols(Array("hour", "mobile", "userFeatures"))
+    .setOutputCol("features")
+  val output = assembler.transform(dataset)
+  println(output.select("features", "clicked").first())
+}
\ No newline at end of file
diff --git a/examples/src/main/scala/org/apache/spark/examples/ml/VectorIndexerExample.scala b/examples/src/main/scala/org/apache/spark/examples/ml/VectorIndexerExample.scala
new file mode 100644
index 000000000000..480bac0d4092
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ml/VectorIndexerExample.scala
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.ml
+
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.{SparkContext, SparkConf}
+import org.apache.spark.ml.feature.VectorIndexer
+
+/**
+ * An example runner for vector indexer. Run with
+ * {{{
+ * ./bin/run-example ml.VectorIndexerExample [options]
+ * }}}
+ */
+object VectorIndexerExample {
+  val conf = new SparkConf().setAppName("VectorIndexerExample")
+  val sc = new SparkContext(conf)
+  val sqlContext = new SQLContext(sc)
+
+  val data = sqlContext.read.format("libsvm")
+    .load("data/mllib/sample_libsvm_data.txt")
+  val indexer = new VectorIndexer()
+    .setInputCol("features")
+    .setOutputCol("indexed")
+    .setMaxCategories(10)
+  val indexerModel = indexer.fit(data)
+  val categoricalFeatures: Set[Int] = indexerModel.categoryMaps.keys.toSet
+  println(s"Chose ${categoricalFeatures.size} categorical features: " +
+    categoricalFeatures.mkString(", "))
+
+  // Create new column "indexed" with categorical values transformed to indices
+  val indexedData = indexerModel.transform(data)
+}