From bf0bb39b79e9075b10f2764ba5a3e9ff691725a3 Mon Sep 17 00:00:00 2001 From: Nicolas R Date: Tue, 4 Oct 2016 17:26:49 +0200 Subject: [PATCH 1/2] Added the PrefixSpan MLlib example based on the current documentation --- .../main/python/mllib/prefix_span_example.py | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 examples/src/main/python/mllib/prefix_span_example.py diff --git a/examples/src/main/python/mllib/prefix_span_example.py b/examples/src/main/python/mllib/prefix_span_example.py new file mode 100644 index 000000000000..3ecad105488a --- /dev/null +++ b/examples/src/main/python/mllib/prefix_span_example.py @@ -0,0 +1,38 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# $example on$ +from pyspark.mllib.fpm import PrefixSpan +# $example off$ +from pyspark import SparkContext + +if __name__ == "__main__": + sc = SparkContext(appName="PythonPrefixSpanExample") + + # $example on$ + sequences = sc.parallelize([ + [[1,2],[3]], + [[1],[3,2],[1,2]], + [[1,2],[5]], + [[6]], + ]) + + model = PrefixSpan.train(sequences, minSupport=0.5, maxPatternLength=5) + result = model.freqSequences().collect() + for fs in result: + print('{}, {}'.format(fs.sequence,fs.freq)) + # $example off$ From bead90c9d1033877f112716d2f67ed1809f8ca23 Mon Sep 17 00:00:00 2001 From: Nicolas R Date: Fri, 4 Nov 2016 10:20:38 +0100 Subject: [PATCH 2/2] updated the doc for the Python version of PrefixSpan --- docs/mllib-frequent-pattern-mining.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/docs/mllib-frequent-pattern-mining.md b/docs/mllib-frequent-pattern-mining.md index 93e3f0b2d226..16d03c9d3059 100644 --- a/docs/mllib-frequent-pattern-mining.md +++ b/docs/mllib-frequent-pattern-mining.md @@ -177,6 +177,20 @@ Refer to the [`PrefixSpan` Java docs](api/java/org/apache/spark/mllib/fpm/Prefix {% include_example java/org/apache/spark/examples/mllib/JavaPrefixSpanExample.java %} + + +
+ +[`PrefixSpan`](api/python/pyspark.mllib.html#pyspark.mllib.fpm.PrefixSpan) implements the +PrefixSpan algorithm. +Calling `PrefixSpan.run` returns a +[`PrefixSpanModel`](api/python/pyspark.mllib.html#pyspark.mllib.fpm.PrefixSpanModel) +that stores the frequent sequences with their frequencies. + +Refer to the [`PrefixSpan` Python docs](api/python/pyspark.mllib.html#pyspark.mllib.fpm.PrefixSpan) and [`PrefixSpanModel` Python docs](api/python/pyspark.mllib.html#pyspark.mllib.fpm.PrefixSpanModel) for details on the API. + +{% include_example python/mllib/prefix_span_example.py %} +