From 5bba9dd2ee1370dda9c51d852db02a8fb3577501 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Thu, 3 Jul 2014 11:33:22 +0800
Subject: [PATCH 001/143] Create ParallelANN.scala

This is the main ParallelANN class and associated Model
---
 .../apache/spark/mllib/ann/ParallelANN.scala  | 391 ++++++++++++++++++
 1 file changed, 391 insertions(+)
 create mode 100644 mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala
new file mode 100644
index 0000000000000..f0668d77895d2
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala
@@ -0,0 +1,391 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.ann
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.mllib.linalg.Vector
+import org.apache.spark.mllib.optimization._
+import org.apache.spark.mllib.linalg.Vectors
+import breeze.linalg.DenseVector
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.rdd.RDD
+import breeze.linalg.{axpy => brzAxpy, Vector => BV}
+import breeze.linalg.{Vector => BV}
+import breeze.linalg.{axpy => brzAxpy}
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.mllib.regression.RegressionModel
+
+/*
+ * Implements a Artificial Neural Network (ANN)
+ * 
+ * format of data:
+ * data[ 0..noInput-1 ]: Input
+ * data[ noInput..noInput+noOutput-1 ]: Output
+ *
+ */
+
+trait ANN {
+  
+  def noInput: Integer
+  def noHidden: Integer
+  def noOutput: Integer
+  def beta: Double
+  
+  def g( x: Double ) = (1/(1+math.exp(-beta*x)))
+  def dg( x: Double ) = beta*g(x)*(1-g(x))
+
+
+  /* returns the hidden layer including the -1 robonode! */
+  def computeHidden( data: Vector, weights: Vector ): Vector = {
+    
+    val brzData = data.toBreeze
+    val brzInp = DenseVector.vertcat( brzData( 0 to noInput-1 ).toDenseVector, DenseVector[Double](-1.0) )    
+    val brzWeights = weights.toBreeze
+    var hidden = DenseVector.zeros[Double]( noHidden+1 )
+    
+    for( j <- 0 to noHidden-1 ) {
+      
+      val weightsSubset = brzWeights( j*(noInput+1) to j*(noInput+1)+(noInput+1)-1 ).toVector
+      hidden( j ) = g( weightsSubset.dot( brzInp ) )
+      
+    }
+    
+    hidden( noHidden ) = -1.0
+    
+    Vectors.fromBreeze( hidden )
+    
+  }
+  
+  /* returns the hidden layer including the -1 robonode, as well as the final estimation */
+  def computeValues( data: Vector, weights: Vector ): (Vector, Vector) = {
+    
+    var hidden = computeHidden( data, weights )
+    var output = new Array[Double](noOutput)
+        
+    for( k<-0 to noOutput-1 ) {
+      val brzWeights = weights.toBreeze
+      var weightsSubset = brzWeights( noHidden*(noInput+1)+k*(noHidden+1) to noHidden*(noInput+1)+(k+1)*(noHidden+1)-1).toVector
+      output(k) = g( weightsSubset.dot( hidden.toBreeze ) )
+    }
+    
+    ( hidden, Vectors.dense( output ) )
+    
+  }  
+  
+}
+
+class ParallelANNModel private[mllib] 
+(
+    override val weights: Vector,
+    val noInp: Integer,
+    val noHid: Integer,
+    val noOut: Integer,
+    val b: Double )
+  extends GeneralizedSteepestDescendModel(weights) with RegressionModel with Serializable with ANN {  
+  
+  val noInput = noInp
+  val noHidden = noHid
+  val noOutput = noOut
+  val beta = b
+  
+  override def predictPoint( data: Vector, weights: Vector ): Double = {
+    val outp = computeValues( data, weights )._2
+    outp.toArray(0)
+  }
+  
+  def predictPointV( data: Vector, weights: Vector): Vector = {
+    computeValues( data, weights )._2
+  }   
+  
+}
+
+/**
+ * Train a linear regression model with no regularization using Stochastic Gradient Descent.
+ * This solves the least squares regression formulation
+ *              f(weights) = 1/n ||A weights-y||^2
+ * (which is the mean squared error).
+ * Here the data matrix has n rows, and the input RDD holds the set of rows of A, each with
+ * its corresponding right hand side label y.
+ * See also the documentation for the precise formulation.
+ */
+class ParallelANNWithSGD private (
+    private var stepSize: Double,
+    private var numIterations: Int,
+    private var miniBatchFraction: Double,
+    private var noInput: Int,
+    private var noHidden: Int,
+    private var noOutput: Int,
+    private val beta: Double )    
+  extends GeneralizedSteepestDescendAlgorithm[ParallelANNModel] with Serializable {
+  
+  private val gradient = new LeastSquaresGradientANN( noInput, noHidden, noOutput, beta )
+  private val updater = new ANNUpdater()
+  override val optimizer = new GradientDescent(gradient, updater)
+    .setStepSize(stepSize)
+    .setNumIterations(numIterations)
+    .setMiniBatchFraction(miniBatchFraction)
+
+  /**
+   * Construct a LinearRegression object with default parameters: {stepSize: 1.0,
+   * numIterations: 100, miniBatchFraction: 1.0}.
+   */
+  def this() = {
+    this( 1.0, 100, 1.0, 1, 5, 1, 1.0 )
+  }
+  
+  def this( noHidden: Int ) = {
+    this( 1.0, 100, 1.0, 1, noHidden, 1, 1.0 )
+  }
+  
+  def this( noInput: Int, noHidden: Int ) = {
+    this( 1.0, 100, 1.0, noInput, noHidden, 1, 1.0 )
+  }
+  
+  def this( noInput: Int, noHidden: Int, noOutput: Int ) = {
+    this( 1.0, 100, 1.0, noInput, noHidden, noOutput, 1.0 )
+  }
+
+  override protected def createModel(weights: Vector) = {
+    new ParallelANNModel( weights, noInput, noHidden, noOutput, beta )    
+  }
+  
+  def checkOutput( rdd: RDD[(Vector,Vector)] ) {
+    val oVals = rdd.flatMap( T => T._2.toArray )
+    var omax = oVals.max    
+    assert( omax <= 1 )
+    var omin = oVals.min
+    assert( omin >= 0 )    
+  }
+  
+  def randomDouble( i: Int ): Double = {    
+    (((i+5)*59049+(i+5)*78125)%65536).toDouble/65536
+  }  
+
+  def train( rdd: RDD[(Vector,Vector)] ): ParallelANNModel = {
+ 
+    val ft = rdd.first()
+    
+    assert( noInput == ft._1.size )
+    assert( noOutput == ft._2.size )
+    
+    checkOutput( rdd ) 
+    
+    val noWeights = (noInput+1)*noHidden + (noHidden+1)*noOutput
+
+    val initialWeightsArr = new Array[Double](noWeights)
+    
+    for( i <- 0 to (noInput+1)*noHidden-1 )
+      initialWeightsArr( i ) = (randomDouble(i)*4.8-2.4)/(noInput+1)
+    for( i <- 0 to (noHidden+1)*noOutput-1 )
+      initialWeightsArr( (noInput+1)*noHidden+i ) = (randomDouble(i)*4.8-2.4)/(noHidden+1)
+      
+    val initialWeights = Vectors.dense( initialWeightsArr )
+        
+    println( "Parameters:" )
+    println( "  noInput: "+noInput )
+    println( "  noHidden: "+noHidden )
+    println( "  noOutput: "+noOutput )
+    println( "  noWeights: "+noWeights )
+    
+    run( rdd, initialWeights )
+
+  }
+  
+  def train( rdd: RDD[(Vector,Vector)], model: ParallelANNModel ): ParallelANNModel = {
+    run( rdd, model.weights )
+  }
+
+}
+
+/**
+ * data consists of input vector and output vector, and has the following form:
+ * 
+ * [ ---input--- ---output---]
+ * 
+ * where input = data( 0 to noInput-1 ) and output = data( noInput to noInput+noOutput-1 ) 
+ * 
+ * V_ij is the weight from input node i to hidden node j
+ * W_jk is the weight from hidden node j to output node k
+ *  
+ * The weights have the following mapping:
+ * 
+ * V_ij goes to position i + j*(noInput+1)
+ * W_jk goes to position (noInput+1)*noHidden + j + k*(noHidden+1)
+ * 
+ * Gradient has same mapping, i.e.
+ * dE/dVij goes to i + j*(noInput+1)
+ * dE/dWjk goes to (noInput+1)*noHidden + j +k*(noHidden+1)
+ * 
+ * Where E = ((estOutput-output),(estOutput-output)), 
+ * the inner product of the difference between estimation and target output with itself.
+ */
+
+class LeastSquaresGradientANN( noInp: Integer, noHid: Integer, noOut: Integer, b: Double ) extends Gradient with ANN {
+      
+  val noInput = noInp
+  val noHidden = noHid
+  val noOutput = noOut
+  val beta = b   
+  
+  override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
+    
+    val brzData = data.toBreeze   
+    val brzInp = DenseVector.vertcat( brzData( 0 to noInput-1 ).toDenseVector, DenseVector[Double](-1.0) )
+    
+    val brzOut = brzData( noInput.toInt to noInput+noOutput-1 ).toVector
+    val brzWeights = weights.toBreeze        
+    val gradient = DenseVector.zeros[Double]( (noInput+1)*noHidden+(noHidden+1)*noOutput )
+    
+    
+    val (hidden, output) = computeValues( data, weights )
+    var brzHidden = hidden.toBreeze /* already includes the robonode */
+    val brzEst = output.toBreeze
+    val diff = brzEst :- brzOut
+    val E = diff.dot(diff)    
+    
+    /* 
+     * The following three fields are for verification only
+    val eps = .000001
+    val noInpCheck = 0
+    val noOutCheck = 0
+    */
+        
+    var brzWeights_tmp = weights.toBreeze
+    
+    /* Wjk */
+    for( j <-0 to noHidden ) {
+      
+      for( k <-0 to noOutput-1 ) {
+        
+    	val brzW = brzWeights( noHidden*(noInput+1)+k*(noHidden+1) to noHidden*(noInput+1)+(k+1)*(noHidden+1)-1 ).toVector
+        var sum_l = brzHidden.dot( brzW )
+        gradient( noHidden*(noInput+1)+k*(noHidden+1)+j ) = 2*(diff(k))*dg(sum_l)*brzHidden(j)
+
+        /* 
+         * The following is for verification only 
+        if( noInput==noInpCheck && noOutput==noOutCheck )
+        {
+	      brzWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j ) = brzWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j ) + eps             
+	      val est2 = computeValues( data, Vectors.fromBreeze( brzWeights_tmp ) )._2.toBreeze
+	      val diff2 = est2 - brzOut      
+	      val d = ( diff2.dot(diff2) - E ) / eps
+	      println( "Calc/Est Wjk: "+ ( gradient( noHidden*(noInput+1)+k*(noHidden+1)+j), d ) )
+		  brzWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j ) = brzWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j ) - eps
+        }        
+        */
+
+      }
+      
+    }
+    
+    /* Vij */    
+    for( i <- 0 to noInput ) {
+          
+      for( j <- 0 to noHidden-1 ) { /* the hidden robonode has no associated Vij */        
+      
+        for( k<- 0 to noOutput-1 ) {
+        
+          val brzW = brzWeights( noHidden*(noInput+1) to noHidden*(noInput+1)+(noHidden+1)-1 ).toVector
+          val sum_n1 = brzHidden.dot( brzW )
+          val brzV = brzWeights( j*(noInput+1) to j*(noInput+1)+(noInput+1)-1 ).toVector
+          val sum_n2 = brzV.dot( brzInp )
+          gradient( i+j*(noInput+1) ) = 
+          gradient( i+j*(noInput+1) ) + 2*(diff(k))*dg( sum_n1 )*brzWeights( noHidden*(noInput+1)+k*(noHidden+1)+j )*dg( sum_n2 )*brzInp( i )
+        }
+        
+        /* 
+         * The following is for verification only 
+        if( noInput==noInpCheck && noOutput==noOutCheck )
+        {
+          brzWeights_tmp( i+j*(noInput+1) ) = brzWeights_tmp( i+j*(noInput+1) ) + eps
+          val est2 = computeValues( data, Vectors.fromBreeze( brzWeights_tmp ) )._2.toBreeze
+          val diff2 = est2 - brzOut
+          val d = ( diff2.dot( diff2 ) - E ) / eps
+          println( "Calc/Est Vij: "+ ( gradient( i+j*(noInput+1) ), d ) )        
+          brzWeights_tmp( i+j*(noInput+1) ) = brzWeights_tmp( i+j*(noInput+1) ) - eps
+        }
+        */
+      }
+    }      
+    (Vectors.fromBreeze(gradient), E)
+
+  }
+
+  override def compute(
+      data: Vector,
+      label: Double,
+      weights: Vector,
+      cumGradient: Vector): Double = {
+    
+    val (grad, err) = compute( data, label, weights )
+    
+    cumGradient.toBreeze += grad.toBreeze
+
+    return err
+    
+  }
+}
+
+class ANNUpdater extends Updater {
+  
+  override def compute(
+      weightsOld: Vector,
+      gradient: Vector,
+      stepSize: Double,
+      iter: Int,
+      regParam: Double): (Vector, Double) = {
+       
+    val thisIterStepSize = stepSize  
+    
+    val brzWeights: BV[Double] = weightsOld.toBreeze.toDenseVector
+        
+    brzAxpy(-thisIterStepSize, gradient.toBreeze, brzWeights)
+    
+    (Vectors.fromBreeze(brzWeights), 0)
+  }
+  
+}
+
+class ParallelANN (
+    
+    private var stepSize: Double,
+    private var numIterations: Int,
+    private var miniBatchFraction: Double,
+    private var noInput: Int,
+    private var noHidden: Int,
+    private var noOutput: Int,
+    private val beta: Double
+    
+  ) extends GeneralizedSteepestDescendAlgorithm[ParallelANNModel] with Serializable {
+
+  private val gradient = new LeastSquaresGradientANN( noInput, noHidden, noOutput, beta )
+  private val updater = new SimpleUpdater()
+  override val optimizer = new GradientDescent(gradient, updater)
+    .setStepSize(stepSize)
+    .setNumIterations(numIterations)
+    .setMiniBatchFraction(miniBatchFraction)
+
+  def this() = {
+    this( 0.001, 100, 1.0, 1, 5, 1, 1.0 )
+  }
+  
+  override protected def createModel(weights: Vector) = {
+    new ParallelANNModel(weights, noInput, noHidden, noOutput, beta)
+  }  
+  
+}

From 587474323379775eb8750349040ddb660b6431f7 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Thu, 3 Jul 2014 11:34:41 +0800
Subject: [PATCH 002/143] Create GeneralizedSteepestDescendAlgorithm

This is the general steepest descend model, with as inputs Vectors and outputs Vectors or Doubles.
---
 .../ann/GeneralizedSteepestDescendAlgorithm   | 165 ++++++++++++++++++
 1 file changed, 165 insertions(+)
 create mode 100644 mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescendAlgorithm

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescendAlgorithm b/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescendAlgorithm
new file mode 100644
index 0000000000000..bc5d9e0e31ec4
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescendAlgorithm
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.ann
+
+import breeze.linalg.{DenseVector => BDV, SparseVector => BSV}
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.Logging
+import org.apache.spark.rdd.RDD
+import org.apache.spark.mllib.optimization._
+import org.apache.spark.mllib.linalg.{Vectors, Vector}
+import breeze.linalg.DenseVector
+import breeze.linalg.{DenseVector => BDV}
+import breeze.linalg.{SparseVector => BSV}
+
+/**
+ * :: DeveloperApi ::
+ * GeneralizedLinearModel (GLM) represents a model trained using
+ * GeneralizedLinearAlgorithm. GLMs consist of a weight vector and
+ * an intercept.
+ *
+ * @param weights Weights computed for every feature.
+ * @param intercept Intercept computed for this model.
+ */
+@DeveloperApi
+abstract class GeneralizedSteepestDescendModel(val weights: Vector )
+  extends Serializable {
+
+  /**
+   * Predict the result given a data point and the weights learned.
+   *
+   * @param dataMatrix Row vector containing the features for this data point
+   * @param weightMatrix Column vector containing the weights of the model
+   * 
+   * If the prediction model consists of a multi-dimensional vector, predictPoint
+   * returns only the first element of each vector. To get the whole vector, 
+   * use predictPointV instead.
+   */
+  protected def predictPoint( dataMatrix: Vector, weightMatrix: Vector ): Double
+  
+  /**
+   * Predict the result given a data point and the weights learned.
+   *
+   * @param dataMatrix Row vector containing the features for this data point
+   * @param weightMatrix Column vector containing the weights of the model
+   * 
+   * Returns the complete output vector.
+   */  
+  protected def predictPointV( dataMatrix: Vector, weightsMatrix: Vector ): Vector
+
+  /**
+   * Predict values for the given data set using the model trained.
+   *
+   * @param testData RDD representing data points to be predicted
+   * @return RDD[Double] where each entry contains the corresponding prediction
+   * 
+   * Returns only first element of output vector.
+   */
+  def predict( testData: RDD[Vector] ): RDD[Double] = {
+    
+    val localWeights = weights    
+    testData.map(v => predictPoint(v, localWeights ) )
+    
+  }
+  
+  /**
+   * Predict values for the given data set using the model trained.
+   *
+   * @param testData RDD representing data points to be predicted
+   * @return RDD[Vector] where each entry contains the corresponding prediction
+   * 
+   * Returns the complete output vector.
+   */  
+  def predictV( testData: RDD[Vector] ): RDD[Vector] = {
+    
+    val localWeights = weights
+    testData.map( v => predictPointV( v, localWeights ) )
+    
+  }
+  
+  /**
+   * Predict values for a single data point using the model trained.
+   *
+   * @param testData array representing a single data point
+   * @return Double prediction from the trained model
+   * 
+   * Returns only first element of output vector.
+   */
+  def predict( testData: Vector ): Double = {
+    
+    predictPoint( testData, weights )
+    
+  }
+  
+  /**
+   * Predict values for a single data point using the model trained.
+   *
+   * @param testData array representing a single data point
+   * @return Double prediction from the trained model
+   * 
+   * Returns the complete vector.
+   */
+  def predictV( testData: Vector ): Vector = {
+    
+    predictPointV( testData, weights )
+    
+  }
+
+}
+
+/**
+ * :: DeveloperApi ::
+ * GeneralizedSteepestDescend implements methods to train a function using
+ * the Steepest Descend algorithm.
+ * This class should be extended with an Optimizer to create a new GLM.
+ */
+@DeveloperApi
+abstract class GeneralizedSteepestDescendAlgorithm[M <: GeneralizedSteepestDescendModel]
+  extends Logging with Serializable {
+
+  /** The optimizer to solve the problem. */
+  def optimizer: Optimizer
+
+  /**
+   * Create a model given the weights
+   */
+  protected def createModel(weights: Vector): M
+  
+  /** Prepends one to the input vector. */
+  private def prependOne(vector: Vector): Vector = {
+    val vector1 = vector.toBreeze match {
+      case dv: BDV[Double] => BDV.vertcat(BDV.ones[Double](1), dv)
+      case sv: BSV[Double] => BSV.vertcat(new BSV[Double](Array(0), Array(1.0), 1), sv)
+      case v: Any => throw new IllegalArgumentException("Do not support vector type " + v.getClass)
+    }
+    Vectors.fromBreeze(vector1)
+  }
+
+  /**
+   * Run the algorithm with the configured parameters on an input RDD
+   * of LabeledPoint entries starting from the initial weights provided.
+   */
+  def run(input: RDD[(Vector,Vector)], initialWeights: Vector): M = {        
+    
+    val data = input.map( v => ( (0.0).toDouble, Vectors.fromBreeze( DenseVector.vertcat( v._1.toBreeze.toDenseVector, v._2.toBreeze.toDenseVector ) ) ) )
+    val weights = optimizer.optimize(data, initialWeights)
+
+    createModel( weights )
+    
+  }
+}

From 8c3ff4a553f46628aea096df3ae450dfe36d6f22 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Thu, 3 Jul 2014 11:36:31 +0800
Subject: [PATCH 003/143] Create TestParallelANN.scala

This is a test program for parallel ANNs.
---
 .../spark/mllib/ann/TestParallelANN.scala     | 259 ++++++++++++++++++
 1 file changed, 259 insertions(+)
 create mode 100644 mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
new file mode 100644
index 0000000000000..813f138cab096
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
@@ -0,0 +1,259 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.spark.mllib.ann
+
+import org.apache.spark._
+import org.apache.spark.mllib.regression._
+import org.apache.spark.mllib.linalg._
+import org.apache.spark.mllib.ann._
+import scala.util.Random
+
+object TestParallelANN {
+  
+  var rand = new Random  
+  
+  def generateInput2D( f: Double => Double, xmin: Double, xmax: Double, noPoints: Int ): Array[(Vector,Vector)] = {
+    
+	var out = new Array[(Vector,Vector)](noPoints)
+	
+	for( i <- 0 to noPoints-1 ) {
+	  val x = xmin+rand.nextDouble()*(xmax-xmin)
+	  val y = f(x)	    
+	  out(i) = ( Vectors.dense( x ), Vectors.dense( y ) )	    
+	}	
+	  
+	return out
+  
+  }
+
+  
+  def generateInput3D( f: (Double,Double) => Double, xmin: Double, xmax: Double, ymin: Double, ymax: Double, noPoints: Int ): Array[(Vector,Vector)] = {
+	
+	var out = new Array[(Vector,Vector)](noPoints)
+	
+	for( i <- 0 to noPoints-1 ) {
+	  val x = xmin+rand.nextDouble()*(xmax-xmin)
+	  val y = ymin+rand.nextDouble()*(ymax-ymin)
+	  val z = f( x, y )
+	  var arr = new Array[Double](2)
+	  arr(0) = x
+	  arr(1) = y
+	  out(i) = ( Vectors.dense( arr ), Vectors.dense( z ) )
+	} 
+	  
+	out
+  
+  }  
+
+  def generateInput4D( f: Double => (Double,Double,Double), tmin: Double, tmax: Double, noPoints: Int ): Array[(Vector,Vector)] = {
+	
+	var out = new Array[(Vector,Vector)](noPoints)
+	
+	for( i <- 0 to noPoints-1 ) {
+	  
+	  val t: Double = tmin+rand.nextDouble()*(tmax-tmin)
+	  
+	  var arr = new Array[Double](3)
+	  
+	  var F = f(t)
+	  
+	  arr(0) = F._1
+	  arr(1) = F._2
+	  arr(2) = F._3
+	  
+	  out(i) = ( Vectors.dense( t ), Vectors.dense( arr ) )
+	} 
+	  
+	out
+  
+  }
+  
+  def f( T: Double ): Double = {
+    val y = 0.5+Math.abs(T/5).toInt.toDouble*.15+math.sin(T*math.Pi/10)*.1
+    assert( y<= 1)
+    y
+  }
+  
+  def f3D( x: Double, y: Double ): Double = {        
+    .5+.24*Math.sin( x*2*math.Pi/10 ) + .24*Math.cos( y*2*math.Pi/10 )    
+  }
+  
+  def f4D( t: Double ): (Double, Double,Double) = {  
+    val x = Math.abs(.8*Math.cos( t*2*math.Pi/20 ) )+.1
+    val y = (11+t)/22
+    val z = .5+.35*Math.sin(t*2*math.Pi/5)*Math.cos( t*2*math.Pi/10 ) + .15*t/11    
+    ( x, y, z )
+  }
+  
+  def concat( v1: Vector, v2: Vector ): Vector = {
+    
+    var a1 = v1.toArray
+    var a2 = v2.toArray
+    var a3 = new Array[Double]( a1.size + a2.size )
+    
+    for( i <- 0 to a1.size-1 ) {
+      a3(i) = a1(i)
+    }
+    
+    for( i<-0 to a2.size-1 ) {
+      a3(i+a1.size) = a2(i)
+    }
+    
+    Vectors.dense( a3 )
+    
+  }  
+
+  def main( arg: Array[String] ) {
+    
+    println( "Parallel ANN tester" )
+    
+    var curAngle: Double = 0.0
+    var graphic: Boolean = false
+    
+    if( (arg.length>0) && (arg(0)=="graph" ) ) {
+      graphic = true
+    }
+    
+    var outputFrame2D: OutputFrame2D = null
+    var outputFrame3D: OutputFrame3D = null
+    var outputFrame4D: OutputFrame3D = null
+        
+    if( graphic ) {
+      
+	  outputFrame2D = new OutputFrame2D( "x -> y" )
+	  outputFrame2D.apply
+	    
+	  outputFrame3D = new OutputFrame3D( "(x,y) -> z", 1 )
+	  outputFrame3D.apply    
+	        
+	  outputFrame4D = new OutputFrame3D( "t -> (x,y,z)" )
+      outputFrame4D.apply
+      
+    }
+    
+    var A = 20.0
+    var B = 50.0
+    
+    var conf = new SparkConf().setAppName("Parallel ANN").setMaster("local[5]")
+    var sc = new SparkContext(conf)
+    
+    val testRDD2D = sc.parallelize( generateInput2D( T => f(T), -10, 10, 100 ), 2).cache()    
+    val testRDD3D = sc.parallelize( generateInput3D( (x,y) => f3D(x,y), -10, 10, -10, 10, 100 ), 2).cache    
+    val testRDD4D = sc.parallelize( generateInput4D( t => f4D(t), -10, 10, 100 ), 2 ).cache
+    
+    if( graphic ) {
+      outputFrame2D.setData( testRDD2D.map( T => concat( T._1, T._2 ) ) )
+      outputFrame3D.setData( testRDD3D.map( T => concat( T._1, T._2 ) ) )    
+      outputFrame4D.setData( testRDD4D.map( T => T._2 ) )
+    }
+
+    val parallelANN2D = new ParallelANNWithSGD( 1, 10 )
+    parallelANN2D.optimizer.setNumIterations(1000).setStepSize( 1.0 )    
+      
+    val parallelANN3D = new ParallelANNWithSGD( 2, 20 )
+    parallelANN3D.optimizer.setNumIterations(1000).setStepSize( 1.0 )        
+        
+    val parallelANN4D = new ParallelANNWithSGD( 1, 20, 3 )
+    parallelANN4D.optimizer.setNumIterations( 1000 ).setStepSize( 1.0 )    
+    
+    var model2D = parallelANN2D.train( testRDD2D )
+    var model3D = parallelANN3D.train( testRDD3D )    
+    var model4D = parallelANN4D.train( testRDD4D )
+    
+    val noIt = 100
+    var errHist = new Array[(Int,Double,Double,Double)]( noIt )
+    
+    val validationRDD2D = sc.parallelize( generateInput2D( T => f(T), -10, 10, 100 ), 2).cache()
+    val validationRDD3D = sc.parallelize( generateInput3D( (x,y) => f3D(x,y), -10, 10, -10, 10, 100 ), 2).cache
+    val validationRDD4D = sc.parallelize( generateInput4D( t => f4D(t), -10, 10, 100 ), 2 ).cache
+
+    for( i <- 0 to noIt-1 ) {
+      
+      val predictedAndTarget2D = validationRDD2D.map( T => ( T._1, T._2, model2D.predictV( T._1 ) ) )
+	  val predictedAndTarget3D = validationRDD3D.map( T => ( T._1, T._2, model3D.predictV( T._1 ) ) )	  
+	  val predictedAndTarget4D = validationRDD4D.map( T => ( T._1, T._2, model4D.predictV( T._1 ) ) )
+	  	  
+	  var err2D = predictedAndTarget2D.map( T => 
+	    (T._3.toArray(0) - T._2.toArray(0))*(T._3.toArray(0) - T._2.toArray(0)) 
+	  ).reduce( (u,v) => u+v )
+	  
+	  var err3D = predictedAndTarget3D.map( T => 
+	    (T._3.toArray(0) - T._2.toArray(0))*(T._3.toArray(0) - T._2.toArray(0)) 
+	  ).reduce( (u,v) => u+v )
+
+	  var err4D = predictedAndTarget4D.map( T => {
+	    
+	    val v1 = T._2.toArray
+	    val v2 = T._3.toArray
+	      
+	    (v1(0) - v2(0))*(v1(0) - v2(0))+
+	    (v1(1) - v2(1))*(v1(1) - v2(1))+
+	    (v1(2) - v2(2))*(v1(2) - v2(2))
+	    
+	  } ).reduce( (u,v) => u+v )
+	        
+	  
+	  if( graphic ) {
+
+    	val predicted2D = predictedAndTarget2D.map(
+	      T => concat( T._1, T._3 ) 
+	    )
+	    
+	    val predicted3D = predictedAndTarget3D.map(
+	      T => concat( T._1, T._3 )
+	    )	  
+	    
+	    val predicted4D = predictedAndTarget4D.map(
+	      T => T._3 
+	    )
+  
+     	curAngle = curAngle + math.Pi/4
+	    if( curAngle>=2*math.Pi ) {
+	      curAngle = curAngle-2*math.Pi	    
+	    }
+
+	    outputFrame3D.setAngle( curAngle )
+  	    outputFrame4D.setAngle( curAngle )
+
+	    outputFrame2D.setApproxPoints( predicted2D )
+	    outputFrame3D.setApproxPoints( predicted3D )
+  	    outputFrame4D.setApproxPoints( predicted4D )
+  	    
+	  }
+	    	  
+	  println( "Error 2D/3D/4D: "+(err2D, err3D, err4D) )
+	  errHist(i) = ( i, err2D, err3D, err4D )
+	    
+	  if( i<noIt-1 ) {
+	    model2D = parallelANN2D.train( testRDD2D, model2D )
+	    model3D = parallelANN3D.train( testRDD3D, model3D )	    
+	    model4D = parallelANN4D.train( testRDD4D, model4D )
+	  }
+
+    }
+
+    sc.stop   
+    
+    for( i<-0 to noIt-1 ) {
+      println( errHist(i) ) 
+    }
+    
+  }
+  
+}

From 96a097005f793e68e903e5e7d8bdf859a89a6f09 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Thu, 3 Jul 2014 11:42:29 +0800
Subject: [PATCH 004/143] Create TestParallelANNgraphics.scala

Visualisation tools; only used when "TestParallelANN" is given the "graph" parameter.
---
 .../mllib/ann/TestParallelANNgraphics.scala   | 310 ++++++++++++++++++
 1 file changed, 310 insertions(+)
 create mode 100644 mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala
new file mode 100644
index 0000000000000..1d3ac8e4b2486
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala
@@ -0,0 +1,310 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.ann
+
+import java.awt._
+import java.awt.event._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.mllib.linalg.Vector
+import scala.Array.canBuildFrom
+
+object windowAdapter extends WindowAdapter {
+  
+  override def windowClosing( e: WindowEvent ) {
+    System.exit(0)
+  }
+  
+}
+
+class OutputCanvas2D( wd: Int, ht: Int ) extends Canvas {
+  
+  var points: Array[Vector] = null
+  var approxPoints: Array[Vector] = null
+    
+  /* input: rdd of (x,y) vectors */
+  def setData( rdd: RDD[Vector] ) {
+    points = rdd.toArray  
+    repaint
+  }
+    
+  def setApproxPoints( rdd: RDD[Vector] ) {      
+    approxPoints = rdd.toArray
+    repaint
+  }
+    
+  def plotDot( g: Graphics, x: Int, y: Int ) {
+    val r = 5  
+    val noSamp = 6*r
+    var x1 = x
+    var y1 = y+r
+    for( j<-1 to noSamp ) {
+      val x2 = (x.toDouble+math.sin( j.toDouble*2*math.Pi/noSamp )*r+.5).toInt
+      val y2 = (y.toDouble+math.cos( j.toDouble*2*math.Pi/noSamp )*r+.5).toInt
+      g.drawLine( x1, ht-y1, x2, ht-y2 )
+      x1 = x2
+      y1 = y2
+    }
+  }
+  
+  override def paint( g: Graphics) = {
+	  
+	  var xmax: Double = 0.0
+	  var xmin: Double = 0.0
+	  var ymax: Double = 0.0
+	  var ymin: Double = 0.0
+	  
+	  if( points!=null ) {
+	  
+	    g.setColor( Color.black )
+	    val x = points.map( T => (T.toArray)(0) )
+	    val y = points.map( T => (T.toArray)(1) )
+		  
+	    xmax = x.max
+	    xmin = x.min
+	    ymax = y.max
+	    ymin = y.min
+		  
+	    for( i <- 0 to x.size-1 ) {
+	    
+	      val xr = (((x(i).toDouble-xmin)/(xmax-xmin))*wd+.5).toInt
+	      val yr = (((y(i).toDouble-ymin)/(ymax-ymin))*ht+.5).toInt	    
+	      plotDot( g, xr, yr )
+	    
+	    }
+	  
+      if( approxPoints != null ) {
+        
+	      g.setColor( Color.red )
+		    val x = approxPoints.map( T => (T.toArray)(0) )
+		    val y = approxPoints.map( T => (T.toArray)(1) )
+		
+		    for( i <- 0 to x.size-1 ) {
+		      val xr = (((x(i).toDouble-xmin)/(xmax-xmin))*wd+.5).toInt
+		      val yr = (((y(i).toDouble-ymin)/(ymax-ymin))*ht+.5).toInt
+		      plotDot( g, xr, yr )
+		    }
+		    
+	    }
+
+	  }
+	  
+  }
+}
+
+class OutputFrame2D( title: String ) extends Frame( title ) {
+  
+  val wd = 800
+  val ht = 600
+  
+  var outputCanvas = new OutputCanvas2D( wd, ht )
+  
+  def apply() {
+    addWindowListener( windowAdapter )
+    setSize( wd, ht )
+    add( "Center", outputCanvas )
+    show()
+  }
+  
+  def setData( rdd: RDD[Vector] ) {
+    outputCanvas.setData( rdd )
+  }
+
+  def setApproxPoints( rdd: RDD[Vector] ) {
+    outputCanvas.setApproxPoints( rdd )
+  }
+
+  
+}
+
+object windowAdapter3D extends WindowAdapter {
+  
+  override def windowClosing( e: WindowEvent ) {
+    System.exit(0)
+  }
+  
+}
+
+class OutputCanvas3D( wd: Int, ht: Int, shadowFrac: Double ) extends Canvas {
+ 
+  var angle: Double = 0
+  var points: Array[Vector] = null
+  var approxPoints: Array[Vector] = null
+
+  /* 3 dimensional (x,y,z) vector */
+  def setData( rdd: RDD[Vector] ) {
+    points = rdd.toArray  
+    repaint
+  }
+    
+  def setApproxPoints( rdd: RDD[Vector] ) {      
+    approxPoints = rdd.toArray
+    repaint
+  }
+    
+  def plotDot( g: Graphics, x: Int, y: Int ) {
+    val r = 5  
+    val noSamp = 6*r
+    var x1 = x
+    var y1 = y+r
+    for( j<-1 to noSamp ) {
+      val x2 = (x.toDouble+math.sin( j.toDouble*2*math.Pi/noSamp )*r+.5).toInt
+      val y2 = (y.toDouble+math.cos( j.toDouble*2*math.Pi/noSamp )*r+.5).toInt
+      g.drawLine( x1, ht-y1, x2, ht-y2 )
+      x1 = x2
+      y1 = y2
+    }
+  }
+    
+  def plotLine( g: Graphics, x1: Int, y1: Int, x2: Int, y2: Int ) {
+    g.drawLine( x1, ht-y1, x2, ht-y2 )
+  }
+        
+  def calcCord( arr: Array[Double], angle: Double ): (Double, Double, Double, Double, Double, Double) = {
+
+    var arrOut = new Array[Double](6)
+    
+    val x = arr(0)*math.cos( angle ) - arr(1)*math.sin( angle )
+    val y = arr(0)*math.sin( angle ) + arr(1)*math.cos( angle )
+    val z = arr(2)
+      
+    val x0 = arr(0)*math.cos( angle ) - arr(1)*math.sin( angle )
+    val y0 = arr(0)*math.sin( angle ) + arr(1)*math.cos( angle )
+    val z0 = 0
+      
+    val xs = (arr(0)+shadowFrac*arr(2))*math.cos( angle ) - arr(1)*math.sin( angle )
+    val ys = (arr(0)+shadowFrac*arr(2))*math.sin( angle ) + arr(1)*math.cos( angle )
+    val zs = 0
+      
+    arrOut(0) = y-.5*x
+    arrOut(1) = z-.25*x
+      
+    arrOut(2) = y0-.5*x0
+    arrOut(3) = z0-.25*x0
+      
+    arrOut(4) = ys-.5*xs
+    arrOut(5) = zs-.25*xs
+
+    ( arrOut(0), arrOut(1), arrOut(2), arrOut(3), arrOut(4), arrOut(5) )
+      
+  }
+  
+  override def paint( g: Graphics) = {	
+	  
+	if( points!=null ) {
+		  
+	  var p = points.map( T => calcCord( T.toArray, angle ) ).toArray
+		  
+	  var xmax = p(0)._1
+	  var xmin = p(0)._1
+	  var ymax = p(0)._2
+	  var ymin = p(0)._2
+		  
+	  for( i <-0 to p.size-1 ) {
+
+		  if( xmax<p(i)._1 ) xmax = p(i)._1
+	  	if( xmax<p(i)._3 ) xmax = p(i)._3
+		  if( xmax<p(i)._5 ) xmax = p(i)._5		    
+
+		  if( xmin>p(i)._1 ) xmin = p(i)._1
+		  if( xmin>p(i)._3 ) xmin = p(i)._3
+		  if( xmin>p(i)._5 ) xmin = p(i)._5		    
+
+		  if( ymax<p(i)._2 ) ymax = p(i)._2
+		  if( ymax<p(i)._4 ) ymax = p(i)._4
+		  if( ymax<p(i)._6 ) ymax = p(i)._6		    
+
+		  if( ymin>p(i)._2 ) ymin = p(i)._2
+		  if( ymin>p(i)._4 ) ymin = p(i)._4
+		  if( ymin>p(i)._6 ) ymin = p(i)._6		    
+		      
+	  }
+		  
+	  for( i <- 0 to p.size-1 ) {
+
+		  var x_ = (((p(i)._1-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
+	    var y_ = (((p(i)._2-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt
+	    var x0 = (((p(i)._3-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
+  	  var y0 = (((p(i)._4-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt  	        
+  	  var xs = (((p(i)._5-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
+  	  var ys = (((p(i)._6-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt
+  	        
+		  g.setColor( Color.black )
+		  
+		  plotDot( g, x_, y_ )
+	    plotLine( g, x_, y_, x0, y0 )
+	    g.setColor( Color.gray )
+	    plotLine( g, x0, y0, xs, ys )
+		    
+	  }	  
+		  		  
+	  if( approxPoints != null ) {
+		    
+  		var p = approxPoints.map( T => calcCord( T.toArray, angle ) )
+			  			    
+		for( i <- 0 to p.size-1 ) {
+	
+		  var x_ = (((p(i)._1-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
+		  var y_ = (((p(i)._2-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt
+		  var x0 = (((p(i)._3-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
+	  	  var y0 = (((p(i)._4-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt  	        
+	  	  var xs = (((p(i)._5-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
+	  	  var ys = (((p(i)._6-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt
+	  	        
+		  g.setColor( Color.red )
+		  plotDot( g, x_, y_ )
+		  plotLine( g, x_, y_, x0, y0 )
+		  g.setColor( Color.magenta )
+		  plotLine( g, x0, y0, xs, ys )
+			    
+	    }	  
+		    
+	  }		  		  
+
+	}
+  }
+}
+
+class OutputFrame3D( title: String, shadowFrac: Double ) extends Frame( title ) {
+  
+  val wd = 800
+  val ht = 600
+  
+  def this( title: String ) = this( title, .25 )
+  
+  var outputCanvas = new OutputCanvas3D( wd, ht, shadowFrac )
+  
+  def apply() {
+    addWindowListener( windowAdapter3D )
+    setSize( wd, ht )
+    add( "Center", outputCanvas )
+    show()
+  }
+  
+  def setData( rdd: RDD[Vector] ) {
+    outputCanvas.setData( rdd )
+  }
+  
+  def setAngle( angle: Double ) {
+    outputCanvas.angle = angle
+  }
+
+  def setApproxPoints( rdd: RDD[Vector] ) {
+    outputCanvas.setApproxPoints( rdd )
+  }
+    
+}

From 69b0e59ca3d29e783848a6b164c35b7f3c94ed49 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Thu, 3 Jul 2014 17:15:29 +0800
Subject: [PATCH 005/143] Update TestParallelANN.scala

Due to TAB characters, some of the indent was messed up. This fixes it.
---
 .../spark/mllib/ann/TestParallelANN.scala     | 138 +++++++++---------
 1 file changed, 69 insertions(+), 69 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
index 813f138cab096..3b56da115701e 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
@@ -30,57 +30,57 @@ object TestParallelANN {
   
   def generateInput2D( f: Double => Double, xmin: Double, xmax: Double, noPoints: Int ): Array[(Vector,Vector)] = {
     
-	var out = new Array[(Vector,Vector)](noPoints)
+    var out = new Array[(Vector,Vector)](noPoints)
 	
-	for( i <- 0 to noPoints-1 ) {
-	  val x = xmin+rand.nextDouble()*(xmax-xmin)
-	  val y = f(x)	    
-	  out(i) = ( Vectors.dense( x ), Vectors.dense( y ) )	    
-	}	
+    for( i <- 0 to noPoints-1 ) {
+      val x = xmin+rand.nextDouble()*(xmax-xmin)
+      val y = f(x)	    
+      out(i) = ( Vectors.dense( x ), Vectors.dense( y ) )	    
+    }	
 	  
-	return out
+    return out
   
   }
 
   
   def generateInput3D( f: (Double,Double) => Double, xmin: Double, xmax: Double, ymin: Double, ymax: Double, noPoints: Int ): Array[(Vector,Vector)] = {
 	
-	var out = new Array[(Vector,Vector)](noPoints)
+    var out = new Array[(Vector,Vector)](noPoints)
 	
-	for( i <- 0 to noPoints-1 ) {
-	  val x = xmin+rand.nextDouble()*(xmax-xmin)
-	  val y = ymin+rand.nextDouble()*(ymax-ymin)
-	  val z = f( x, y )
-	  var arr = new Array[Double](2)
-	  arr(0) = x
-	  arr(1) = y
-	  out(i) = ( Vectors.dense( arr ), Vectors.dense( z ) )
-	} 
+    for( i <- 0 to noPoints-1 ) {
+      val x = xmin+rand.nextDouble()*(xmax-xmin)
+      val y = ymin+rand.nextDouble()*(ymax-ymin)
+      val z = f( x, y )
+      var arr = new Array[Double](2)
+      arr(0) = x
+      arr(1) = y
+      out(i) = ( Vectors.dense( arr ), Vectors.dense( z ) )
+    } 
 	  
-	out
+    out
   
   }  
 
   def generateInput4D( f: Double => (Double,Double,Double), tmin: Double, tmax: Double, noPoints: Int ): Array[(Vector,Vector)] = {
 	
-	var out = new Array[(Vector,Vector)](noPoints)
+    var out = new Array[(Vector,Vector)](noPoints)
 	
-	for( i <- 0 to noPoints-1 ) {
+    for( i <- 0 to noPoints-1 ) {
 	  
-	  val t: Double = tmin+rand.nextDouble()*(tmax-tmin)
+      val t: Double = tmin+rand.nextDouble()*(tmax-tmin)
 	  
-	  var arr = new Array[Double](3)
+      var arr = new Array[Double](3)
 	  
-	  var F = f(t)
+      var F = f(t)
 	  
-	  arr(0) = F._1
-	  arr(1) = F._2
-	  arr(2) = F._3
+      arr(0) = F._1
+      arr(1) = F._2
+      arr(2) = F._3
 	  
-	  out(i) = ( Vectors.dense( t ), Vectors.dense( arr ) )
-	} 
+      out(i) = ( Vectors.dense( t ), Vectors.dense( arr ) )
+    } 
 	  
-	out
+    out
   
   }
   
@@ -186,65 +186,65 @@ object TestParallelANN {
     for( i <- 0 to noIt-1 ) {
       
       val predictedAndTarget2D = validationRDD2D.map( T => ( T._1, T._2, model2D.predictV( T._1 ) ) )
-	  val predictedAndTarget3D = validationRDD3D.map( T => ( T._1, T._2, model3D.predictV( T._1 ) ) )	  
-	  val predictedAndTarget4D = validationRDD4D.map( T => ( T._1, T._2, model4D.predictV( T._1 ) ) )
+      val predictedAndTarget3D = validationRDD3D.map( T => ( T._1, T._2, model3D.predictV( T._1 ) ) )	  
+      val predictedAndTarget4D = validationRDD4D.map( T => ( T._1, T._2, model4D.predictV( T._1 ) ) )
 	  	  
-	  var err2D = predictedAndTarget2D.map( T => 
-	    (T._3.toArray(0) - T._2.toArray(0))*(T._3.toArray(0) - T._2.toArray(0)) 
-	  ).reduce( (u,v) => u+v )
+      var err2D = predictedAndTarget2D.map( T => 
+        (T._3.toArray(0) - T._2.toArray(0))*(T._3.toArray(0) - T._2.toArray(0)) 
+      ).reduce( (u,v) => u+v )
 	  
-	  var err3D = predictedAndTarget3D.map( T => 
-	    (T._3.toArray(0) - T._2.toArray(0))*(T._3.toArray(0) - T._2.toArray(0)) 
-	  ).reduce( (u,v) => u+v )
+      var err3D = predictedAndTarget3D.map( T => 
+        (T._3.toArray(0) - T._2.toArray(0))*(T._3.toArray(0) - T._2.toArray(0)) 
+      ).reduce( (u,v) => u+v )
 
-	  var err4D = predictedAndTarget4D.map( T => {
+      var err4D = predictedAndTarget4D.map( T => {
 	    
-	    val v1 = T._2.toArray
-	    val v2 = T._3.toArray
+        val v1 = T._2.toArray
+        val v2 = T._3.toArray
 	      
-	    (v1(0) - v2(0))*(v1(0) - v2(0))+
-	    (v1(1) - v2(1))*(v1(1) - v2(1))+
-	    (v1(2) - v2(2))*(v1(2) - v2(2))
+        (v1(0) - v2(0))*(v1(0) - v2(0))+
+        (v1(1) - v2(1))*(v1(1) - v2(1))+
+        (v1(2) - v2(2))*(v1(2) - v2(2))
 	    
-	  } ).reduce( (u,v) => u+v )
+      } ).reduce( (u,v) => u+v )
 	        
 	  
-	  if( graphic ) {
+      if( graphic ) {
 
     	val predicted2D = predictedAndTarget2D.map(
-	      T => concat( T._1, T._3 ) 
-	    )
+          T => concat( T._1, T._3 ) 
+        )
 	    
-	    val predicted3D = predictedAndTarget3D.map(
-	      T => concat( T._1, T._3 )
-	    )	  
+        val predicted3D = predictedAndTarget3D.map(
+          T => concat( T._1, T._3 )
+        )	  
 	    
-	    val predicted4D = predictedAndTarget4D.map(
-	      T => T._3 
-	    )
+        val predicted4D = predictedAndTarget4D.map(
+          T => T._3 
+        )
   
      	curAngle = curAngle + math.Pi/4
-	    if( curAngle>=2*math.Pi ) {
-	      curAngle = curAngle-2*math.Pi	    
-	    }
+        if( curAngle>=2*math.Pi ) {
+          curAngle = curAngle-2*math.Pi	    
+        }
 
-	    outputFrame3D.setAngle( curAngle )
-  	    outputFrame4D.setAngle( curAngle )
+        outputFrame3D.setAngle( curAngle )
+        outputFrame4D.setAngle( curAngle )
 
-	    outputFrame2D.setApproxPoints( predicted2D )
-	    outputFrame3D.setApproxPoints( predicted3D )
-  	    outputFrame4D.setApproxPoints( predicted4D )
+        outputFrame2D.setApproxPoints( predicted2D )
+        outputFrame3D.setApproxPoints( predicted3D )
+        outputFrame4D.setApproxPoints( predicted4D )
   	    
-	  }
+      }
 	    	  
-	  println( "Error 2D/3D/4D: "+(err2D, err3D, err4D) )
-	  errHist(i) = ( i, err2D, err3D, err4D )
+      println( "Error 2D/3D/4D: "+(err2D, err3D, err4D) )
+      errHist(i) = ( i, err2D, err3D, err4D )
 	    
-	  if( i<noIt-1 ) {
-	    model2D = parallelANN2D.train( testRDD2D, model2D )
-	    model3D = parallelANN3D.train( testRDD3D, model3D )	    
-	    model4D = parallelANN4D.train( testRDD4D, model4D )
-	  }
+      if( i<noIt-1 ) {
+        model2D = parallelANN2D.train( testRDD2D, model2D )
+        model3D = parallelANN3D.train( testRDD3D, model3D )	    
+        model4D = parallelANN4D.train( testRDD4D, model4D )
+      }
 
     }
 

From 3f528b90a917ba9fe504b478126048f505c01745 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Thu, 3 Jul 2014 17:16:59 +0800
Subject: [PATCH 006/143] Update TestParallelANN.scala

Fixed some issues with indent.
---
 .../org/apache/spark/mllib/ann/TestParallelANN.scala   | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
index 3b56da115701e..f821f87ebd937 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
@@ -136,13 +136,13 @@ object TestParallelANN {
         
     if( graphic ) {
       
-	  outputFrame2D = new OutputFrame2D( "x -> y" )
-	  outputFrame2D.apply
+      outputFrame2D = new OutputFrame2D( "x -> y" )
+      outputFrame2D.apply
 	    
-	  outputFrame3D = new OutputFrame3D( "(x,y) -> z", 1 )
-	  outputFrame3D.apply    
+      outputFrame3D = new OutputFrame3D( "(x,y) -> z", 1 )
+      outputFrame3D.apply    
 	        
-	  outputFrame4D = new OutputFrame3D( "t -> (x,y,z)" )
+      outputFrame4D = new OutputFrame3D( "t -> (x,y,z)" )
       outputFrame4D.apply
       
     }

From b1972b1a602f6ccdede1ab28df7ef059bb482654 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Thu, 3 Jul 2014 17:21:40 +0800
Subject: [PATCH 007/143] Update TestParallelANNgraphics.scala

Due to TAB characters, the indent was mixed up. This fixes it.
---
 .../mllib/ann/TestParallelANNgraphics.scala   | 156 +++++++++---------
 1 file changed, 78 insertions(+), 78 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala
index 1d3ac8e4b2486..4a7a9a712b549 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala
@@ -63,45 +63,45 @@ class OutputCanvas2D( wd: Int, ht: Int ) extends Canvas {
   
   override def paint( g: Graphics) = {
 	  
-	  var xmax: Double = 0.0
-	  var xmin: Double = 0.0
-	  var ymax: Double = 0.0
-	  var ymin: Double = 0.0
+    var xmax: Double = 0.0
+    var xmin: Double = 0.0
+    var ymax: Double = 0.0
+    var ymin: Double = 0.0
 	  
-	  if( points!=null ) {
+    if( points!=null ) {
 	  
-	    g.setColor( Color.black )
-	    val x = points.map( T => (T.toArray)(0) )
-	    val y = points.map( T => (T.toArray)(1) )
+      g.setColor( Color.black )
+      val x = points.map( T => (T.toArray)(0) )
+      val y = points.map( T => (T.toArray)(1) )
 		  
-	    xmax = x.max
-	    xmin = x.min
-	    ymax = y.max
-	    ymin = y.min
+      xmax = x.max
+      xmin = x.min
+      ymax = y.max
+      ymin = y.min
 		  
-	    for( i <- 0 to x.size-1 ) {
+      for( i <- 0 to x.size-1 ) {
 	    
-	      val xr = (((x(i).toDouble-xmin)/(xmax-xmin))*wd+.5).toInt
-	      val yr = (((y(i).toDouble-ymin)/(ymax-ymin))*ht+.5).toInt	    
-	      plotDot( g, xr, yr )
+        val xr = (((x(i).toDouble-xmin)/(xmax-xmin))*wd+.5).toInt
+        val yr = (((y(i).toDouble-ymin)/(ymax-ymin))*ht+.5).toInt	    
+        plotDot( g, xr, yr )
 	    
-	    }
+      }
 	  
       if( approxPoints != null ) {
         
-	      g.setColor( Color.red )
-		    val x = approxPoints.map( T => (T.toArray)(0) )
-		    val y = approxPoints.map( T => (T.toArray)(1) )
+        g.setColor( Color.red )
+        val x = approxPoints.map( T => (T.toArray)(0) )
+        val y = approxPoints.map( T => (T.toArray)(1) )
 		
-		    for( i <- 0 to x.size-1 ) {
-		      val xr = (((x(i).toDouble-xmin)/(xmax-xmin))*wd+.5).toInt
-		      val yr = (((y(i).toDouble-ymin)/(ymax-ymin))*ht+.5).toInt
-		      plotDot( g, xr, yr )
-		    }
+        for( i <- 0 to x.size-1 ) {
+          val xr = (((x(i).toDouble-xmin)/(xmax-xmin))*wd+.5).toInt
+          val yr = (((y(i).toDouble-ymin)/(ymax-ymin))*ht+.5).toInt
+          plotDot( g, xr, yr )
+        }
 		    
-	    }
+      }
 
-	  }
+    }
 	  
   }
 }
@@ -205,77 +205,77 @@ class OutputCanvas3D( wd: Int, ht: Int, shadowFrac: Double ) extends Canvas {
   
   override def paint( g: Graphics) = {	
 	  
-	if( points!=null ) {
+    if( points!=null ) {
 		  
-	  var p = points.map( T => calcCord( T.toArray, angle ) ).toArray
+      var p = points.map( T => calcCord( T.toArray, angle ) ).toArray
 		  
-	  var xmax = p(0)._1
-	  var xmin = p(0)._1
-	  var ymax = p(0)._2
-	  var ymin = p(0)._2
+      var xmax = p(0)._1
+      var xmin = p(0)._1
+      var ymax = p(0)._2
+      var ymin = p(0)._2
 		  
-	  for( i <-0 to p.size-1 ) {
+      for( i <-0 to p.size-1 ) {
 
-		  if( xmax<p(i)._1 ) xmax = p(i)._1
-	  	if( xmax<p(i)._3 ) xmax = p(i)._3
-		  if( xmax<p(i)._5 ) xmax = p(i)._5		    
+        if( xmax<p(i)._1 ) xmax = p(i)._1
+  	if( xmax<p(i)._3 ) xmax = p(i)._3
+        if( xmax<p(i)._5 ) xmax = p(i)._5		    
 
-		  if( xmin>p(i)._1 ) xmin = p(i)._1
-		  if( xmin>p(i)._3 ) xmin = p(i)._3
-		  if( xmin>p(i)._5 ) xmin = p(i)._5		    
+        if( xmin>p(i)._1 ) xmin = p(i)._1
+        if( xmin>p(i)._3 ) xmin = p(i)._3
+        if( xmin>p(i)._5 ) xmin = p(i)._5		    
 
-		  if( ymax<p(i)._2 ) ymax = p(i)._2
-		  if( ymax<p(i)._4 ) ymax = p(i)._4
-		  if( ymax<p(i)._6 ) ymax = p(i)._6		    
+        if( ymax<p(i)._2 ) ymax = p(i)._2
+        if( ymax<p(i)._4 ) ymax = p(i)._4
+        if( ymax<p(i)._6 ) ymax = p(i)._6		    
 
-		  if( ymin>p(i)._2 ) ymin = p(i)._2
-		  if( ymin>p(i)._4 ) ymin = p(i)._4
-		  if( ymin>p(i)._6 ) ymin = p(i)._6		    
+        if( ymin>p(i)._2 ) ymin = p(i)._2
+        if( ymin>p(i)._4 ) ymin = p(i)._4
+        if( ymin>p(i)._6 ) ymin = p(i)._6		    
 		      
-	  }
+      }
 		  
-	  for( i <- 0 to p.size-1 ) {
+      for( i <- 0 to p.size-1 ) {
 
-		  var x_ = (((p(i)._1-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
-	    var y_ = (((p(i)._2-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt
-	    var x0 = (((p(i)._3-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
-  	  var y0 = (((p(i)._4-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt  	        
-  	  var xs = (((p(i)._5-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
-  	  var ys = (((p(i)._6-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt
+        var x_ = (((p(i)._1-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
+        var y_ = (((p(i)._2-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt
+        var x0 = (((p(i)._3-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
+        var y0 = (((p(i)._4-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt  	        
+        var xs = (((p(i)._5-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
+        var ys = (((p(i)._6-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt
   	        
-		  g.setColor( Color.black )
+        g.setColor( Color.black )
 		  
-		  plotDot( g, x_, y_ )
-	    plotLine( g, x_, y_, x0, y0 )
-	    g.setColor( Color.gray )
-	    plotLine( g, x0, y0, xs, ys )
-		    
-	  }	  
+        plotDot( g, x_, y_ )
+        plotLine( g, x_, y_, x0, y0 )
+        g.setColor( Color.gray )
+        plotLine( g, x0, y0, xs, ys )
+	    
+      }	  
 		  		  
-	  if( approxPoints != null ) {
+      if( approxPoints != null ) {
 		    
-  		var p = approxPoints.map( T => calcCord( T.toArray, angle ) )
+        var p = approxPoints.map( T => calcCord( T.toArray, angle ) )
 			  			    
-		for( i <- 0 to p.size-1 ) {
+        for( i <- 0 to p.size-1 ) {
 	
-		  var x_ = (((p(i)._1-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
-		  var y_ = (((p(i)._2-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt
-		  var x0 = (((p(i)._3-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
-	  	  var y0 = (((p(i)._4-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt  	        
-	  	  var xs = (((p(i)._5-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
-	  	  var ys = (((p(i)._6-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt
+          var x_ = (((p(i)._1-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
+          var y_ = (((p(i)._2-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt
+          var x0 = (((p(i)._3-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
+          var y0 = (((p(i)._4-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt  	        
+          var xs = (((p(i)._5-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
+          var ys = (((p(i)._6-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt
 	  	        
-		  g.setColor( Color.red )
-		  plotDot( g, x_, y_ )
-		  plotLine( g, x_, y_, x0, y0 )
-		  g.setColor( Color.magenta )
-		  plotLine( g, x0, y0, xs, ys )
+          g.setColor( Color.red )
+          plotDot( g, x_, y_ )
+          plotLine( g, x_, y_, x0, y0 )
+          g.setColor( Color.magenta )
+          plotLine( g, x0, y0, xs, ys )
 			    
-	    }	  
+        }	  
 		    
-	  }		  		  
+      }		  		  
 
-	}
+    }
   }
 }
 

From dd796157fa30b2255c76721d6d1b6e40c6254f16 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Wed, 30 Jul 2014 11:06:52 +0800
Subject: [PATCH 008/143] Update GeneralizedSteepestDescendAlgorithm

Updated to agree with "sbt/sbt scalastyle"
---
 .../ann/GeneralizedSteepestDescendAlgorithm   | 59 ++++++++++---------
 1 file changed, 32 insertions(+), 27 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescendAlgorithm b/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescendAlgorithm
index bc5d9e0e31ec4..7c436d1067a12 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescendAlgorithm
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescendAlgorithm
@@ -45,21 +45,21 @@ abstract class GeneralizedSteepestDescendModel(val weights: Vector )
    *
    * @param dataMatrix Row vector containing the features for this data point
    * @param weightMatrix Column vector containing the weights of the model
-   * 
+   *
    * If the prediction model consists of a multi-dimensional vector, predictPoint
-   * returns only the first element of each vector. To get the whole vector, 
+   * returns only the first element of each vector. To get the whole vector,
    * use predictPointV instead.
    */
   protected def predictPoint( dataMatrix: Vector, weightMatrix: Vector ): Double
-  
+
   /**
    * Predict the result given a data point and the weights learned.
    *
    * @param dataMatrix Row vector containing the features for this data point
    * @param weightMatrix Column vector containing the weights of the model
-   * 
+   *
    * Returns the complete output vector.
-   */  
+   */
   protected def predictPointV( dataMatrix: Vector, weightsMatrix: Vector ): Vector
 
   /**
@@ -67,57 +67,57 @@ abstract class GeneralizedSteepestDescendModel(val weights: Vector )
    *
    * @param testData RDD representing data points to be predicted
    * @return RDD[Double] where each entry contains the corresponding prediction
-   * 
+   *
    * Returns only first element of output vector.
    */
   def predict( testData: RDD[Vector] ): RDD[Double] = {
-    
-    val localWeights = weights    
+
+    val localWeights = weights
     testData.map(v => predictPoint(v, localWeights ) )
-    
+
   }
-  
+
   /**
    * Predict values for the given data set using the model trained.
    *
    * @param testData RDD representing data points to be predicted
    * @return RDD[Vector] where each entry contains the corresponding prediction
-   * 
+   *
    * Returns the complete output vector.
-   */  
+   */
   def predictV( testData: RDD[Vector] ): RDD[Vector] = {
-    
+
     val localWeights = weights
     testData.map( v => predictPointV( v, localWeights ) )
-    
+
   }
-  
+
   /**
    * Predict values for a single data point using the model trained.
    *
    * @param testData array representing a single data point
    * @return Double prediction from the trained model
-   * 
+   *
    * Returns only first element of output vector.
    */
   def predict( testData: Vector ): Double = {
-    
+
     predictPoint( testData, weights )
-    
+
   }
-  
+
   /**
    * Predict values for a single data point using the model trained.
    *
    * @param testData array representing a single data point
    * @return Double prediction from the trained model
-   * 
+   *
    * Returns the complete vector.
    */
   def predictV( testData: Vector ): Vector = {
-    
+
     predictPointV( testData, weights )
-    
+
   }
 
 }
@@ -139,7 +139,7 @@ abstract class GeneralizedSteepestDescendAlgorithm[M <: GeneralizedSteepestDesce
    * Create a model given the weights
    */
   protected def createModel(weights: Vector): M
-  
+
   /** Prepends one to the input vector. */
   private def prependOne(vector: Vector): Vector = {
     val vector1 = vector.toBreeze match {
@@ -154,12 +154,17 @@ abstract class GeneralizedSteepestDescendAlgorithm[M <: GeneralizedSteepestDesce
    * Run the algorithm with the configured parameters on an input RDD
    * of LabeledPoint entries starting from the initial weights provided.
    */
-  def run(input: RDD[(Vector,Vector)], initialWeights: Vector): M = {        
-    
-    val data = input.map( v => ( (0.0).toDouble, Vectors.fromBreeze( DenseVector.vertcat( v._1.toBreeze.toDenseVector, v._2.toBreeze.toDenseVector ) ) ) )
+  def run(input: RDD[(Vector,Vector)], initialWeights: Vector): M = {
+
+    val data = input.map( v => (
+      (0.0).toDouble,
+      Vectors.fromBreeze( DenseVector.vertcat(
+        v._1.toBreeze.toDenseVector, 
+        v._2.toBreeze.toDenseVector ) ) 
+      ) )
     val weights = optimizer.optimize(data, initialWeights)
 
     createModel( weights )
-    
+
   }
 }

From 1f6de6a9c393920175a1ef47571eae9e8eb318de Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Wed, 30 Jul 2014 11:07:50 +0800
Subject: [PATCH 009/143] Update ParallelANN.scala

Updated to agree with "sbt/sbt scalastyle"
---
 .../apache/spark/mllib/ann/ParallelANN.scala  | 303 +++++++++---------
 1 file changed, 151 insertions(+), 152 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala
index f0668d77895d2..f43fbb4448fa7 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala
@@ -29,10 +29,11 @@ import breeze.linalg.{Vector => BV}
 import breeze.linalg.{axpy => brzAxpy}
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.mllib.regression.RegressionModel
+import org.apache.spark.util.random.XORShiftRandom
 
 /*
  * Implements a Artificial Neural Network (ANN)
- * 
+ *
  * format of data:
  * data[ 0..noInput-1 ]: Input
  * data[ noInput..noInput+noOutput-1 ]: Output
@@ -40,89 +41,82 @@ import org.apache.spark.mllib.regression.RegressionModel
  */
 
 trait ANN {
-  
+
   def noInput: Integer
   def noHidden: Integer
   def noOutput: Integer
   def beta: Double
-  
-  def g( x: Double ) = (1/(1+math.exp(-beta*x)))
-  def dg( x: Double ) = beta*g(x)*(1-g(x))
 
+  def g( x: Double ) = (1/(1 + math.exp(-beta*x)))
+  def dg( x: Double ) = beta*g(x)*(1 - g(x))
 
   /* returns the hidden layer including the -1 robonode! */
   def computeHidden( data: Vector, weights: Vector ): Vector = {
-    
+
     val brzData = data.toBreeze
-    val brzInp = DenseVector.vertcat( brzData( 0 to noInput-1 ).toDenseVector, DenseVector[Double](-1.0) )    
+    val brzInp = DenseVector.vertcat( 
+      brzData( 0 to noInput - 1 ).toDenseVector, DenseVector[Double](-1.0) )
     val brzWeights = weights.toBreeze
-    var hidden = DenseVector.zeros[Double]( noHidden+1 )
-    
+    var hidden = DenseVector.zeros[Double]( noHidden + 1 )
+
     for( j <- 0 to noHidden-1 ) {
-      
-      val weightsSubset = brzWeights( j*(noInput+1) to j*(noInput+1)+(noInput+1)-1 ).toVector
+
+      val weightsSubset = brzWeights( 
+        j*(noInput + 1) to j*(noInput + 1) + (noInput + 1) - 1 ).toVector
       hidden( j ) = g( weightsSubset.dot( brzInp ) )
-      
+
     }
-    
+
     hidden( noHidden ) = -1.0
-    
+
     Vectors.fromBreeze( hidden )
-    
+
   }
-  
+
   /* returns the hidden layer including the -1 robonode, as well as the final estimation */
   def computeValues( data: Vector, weights: Vector ): (Vector, Vector) = {
-    
+
     var hidden = computeHidden( data, weights )
     var output = new Array[Double](noOutput)
-        
-    for( k<-0 to noOutput-1 ) {
+
+    for( k <- 0 to noOutput - 1 ) {
       val brzWeights = weights.toBreeze
-      var weightsSubset = brzWeights( noHidden*(noInput+1)+k*(noHidden+1) to noHidden*(noInput+1)+(k+1)*(noHidden+1)-1).toVector
+      var weightsSubset = brzWeights( noHidden*(noInput + 1) + k*(noHidden + 1) to
+            noHidden*(noInput + 1) + (k + 1)*(noHidden + 1) - 1).toVector
       output(k) = g( weightsSubset.dot( hidden.toBreeze ) )
     }
-    
+
     ( hidden, Vectors.dense( output ) )
-    
-  }  
-  
+
+  }
+
 }
 
-class ParallelANNModel private[mllib] 
+class ParallelANNModel private[mllib]
 (
     override val weights: Vector,
     val noInp: Integer,
     val noHid: Integer,
     val noOut: Integer,
     val b: Double )
-  extends GeneralizedSteepestDescendModel(weights) with RegressionModel with Serializable with ANN {  
-  
+  extends GeneralizedSteepestDescendModel(weights) with RegressionModel with Serializable with ANN {
+
   val noInput = noInp
   val noHidden = noHid
   val noOutput = noOut
   val beta = b
-  
+
   override def predictPoint( data: Vector, weights: Vector ): Double = {
     val outp = computeValues( data, weights )._2
     outp.toArray(0)
   }
-  
+
   def predictPointV( data: Vector, weights: Vector): Vector = {
     computeValues( data, weights )._2
-  }   
-  
+  }
+
 }
 
-/**
- * Train a linear regression model with no regularization using Stochastic Gradient Descent.
- * This solves the least squares regression formulation
- *              f(weights) = 1/n ||A weights-y||^2
- * (which is the mean squared error).
- * Here the data matrix has n rows, and the input RDD holds the set of rows of A, each with
- * its corresponding right hand side label y.
- * See also the documentation for the precise formulation.
- */
 class ParallelANNWithSGD private (
     private var stepSize: Double,
     private var numIterations: Int,
@@ -130,9 +124,11 @@ class ParallelANNWithSGD private (
     private var noInput: Int,
     private var noHidden: Int,
     private var noOutput: Int,
-    private val beta: Double )    
+    private val beta: Double )
   extends GeneralizedSteepestDescendAlgorithm[ParallelANNModel] with Serializable {
-  
+
+  private val rand = new XORShiftRandom
+
   private val gradient = new LeastSquaresGradientANN( noInput, noHidden, noOutput, beta )
   private val updater = new ANNUpdater()
   override val optimizer = new GradientDescent(gradient, updater)
@@ -140,72 +136,62 @@ class ParallelANNWithSGD private (
     .setNumIterations(numIterations)
     .setMiniBatchFraction(miniBatchFraction)
 
-  /**
-   * Construct a LinearRegression object with default parameters: {stepSize: 1.0,
-   * numIterations: 100, miniBatchFraction: 1.0}.
-   */
   def this() = {
     this( 1.0, 100, 1.0, 1, 5, 1, 1.0 )
   }
-  
+
   def this( noHidden: Int ) = {
     this( 1.0, 100, 1.0, 1, noHidden, 1, 1.0 )
   }
-  
+
   def this( noInput: Int, noHidden: Int ) = {
     this( 1.0, 100, 1.0, noInput, noHidden, 1, 1.0 )
   }
-  
+
   def this( noInput: Int, noHidden: Int, noOutput: Int ) = {
     this( 1.0, 100, 1.0, noInput, noHidden, noOutput, 1.0 )
   }
 
   override protected def createModel(weights: Vector) = {
-    new ParallelANNModel( weights, noInput, noHidden, noOutput, beta )    
+    new ParallelANNModel( weights, noInput, noHidden, noOutput, beta )
   }
-  
+
   def checkOutput( rdd: RDD[(Vector,Vector)] ) {
     val oVals = rdd.flatMap( T => T._2.toArray )
-    var omax = oVals.max    
+    var omax = oVals.max
     assert( omax <= 1 )
     var omin = oVals.min
-    assert( omin >= 0 )    
+    assert( omin >= 0 )
+  }
+
+  def randomDouble( i: Int ): Double = {
+    rand.nextDouble()
   }
-  
-  def randomDouble( i: Int ): Double = {    
-    (((i+5)*59049+(i+5)*78125)%65536).toDouble/65536
-  }  
 
   def train( rdd: RDD[(Vector,Vector)] ): ParallelANNModel = {
- 
+
     val ft = rdd.first()
-    
+
     assert( noInput == ft._1.size )
     assert( noOutput == ft._2.size )
-    
-    checkOutput( rdd ) 
-    
-    val noWeights = (noInput+1)*noHidden + (noHidden+1)*noOutput
+
+    checkOutput( rdd )
+
+    val noWeights = (noInput + 1)*noHidden + (noHidden + 1)*noOutput
 
     val initialWeightsArr = new Array[Double](noWeights)
-    
-    for( i <- 0 to (noInput+1)*noHidden-1 )
-      initialWeightsArr( i ) = (randomDouble(i)*4.8-2.4)/(noInput+1)
-    for( i <- 0 to (noHidden+1)*noOutput-1 )
-      initialWeightsArr( (noInput+1)*noHidden+i ) = (randomDouble(i)*4.8-2.4)/(noHidden+1)
-      
+
+    for( i <- 0 to (noInput + 1)*noHidden - 1 )
+      initialWeightsArr( i ) = (randomDouble(i)*4.8 - 2.4)/(noInput + 1)
+    for( i <- 0 to (noHidden + 1)*noOutput - 1 )
+      initialWeightsArr( (noInput + 1)*noHidden + i ) = (randomDouble(i)*4.8 - 2.4)/(noHidden + 1)
+
     val initialWeights = Vectors.dense( initialWeightsArr )
-        
-    println( "Parameters:" )
-    println( "  noInput: "+noInput )
-    println( "  noHidden: "+noHidden )
-    println( "  noOutput: "+noOutput )
-    println( "  noWeights: "+noWeights )
-    
+
     run( rdd, initialWeights )
 
   }
-  
+
   def train( rdd: RDD[(Vector,Vector)], model: ParallelANNModel ): ParallelANNModel = {
     run( rdd, model.weights )
   }
@@ -214,114 +200,127 @@ class ParallelANNWithSGD private (
 
 /**
  * data consists of input vector and output vector, and has the following form:
- * 
- * [ ---input--- ---output---]
- * 
- * where input = data( 0 to noInput-1 ) and output = data( noInput to noInput+noOutput-1 ) 
- * 
+ *
+ * [ ---input--- ---output--- ]
+ *
+ * where input = data( 0 to noInput-1 ) and output = data( noInput to noInput+noOutput-1 )
+ *
  * V_ij is the weight from input node i to hidden node j
  * W_jk is the weight from hidden node j to output node k
- *  
+ *
  * The weights have the following mapping:
- * 
+ *
  * V_ij goes to position i + j*(noInput+1)
  * W_jk goes to position (noInput+1)*noHidden + j + k*(noHidden+1)
- * 
+ *
  * Gradient has same mapping, i.e.
  * dE/dVij goes to i + j*(noInput+1)
  * dE/dWjk goes to (noInput+1)*noHidden + j +k*(noHidden+1)
- * 
- * Where E = ((estOutput-output),(estOutput-output)), 
+ *
+ * Where E = ((estOutput-output),(estOutput-output)),
  * the inner product of the difference between estimation and target output with itself.
  */
 
-class LeastSquaresGradientANN( noInp: Integer, noHid: Integer, noOut: Integer, b: Double ) extends Gradient with ANN {
-      
+class LeastSquaresGradientANN(
+    noInp: Integer, 
+    noHid: Integer, 
+    noOut: Integer, 
+    b: Double ) 
+  extends Gradient with ANN {
+
   val noInput = noInp
   val noHidden = noHid
   val noOutput = noOut
-  val beta = b   
-  
+  val beta = b
+
   override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
-    
-    val brzData = data.toBreeze   
-    val brzInp = DenseVector.vertcat( brzData( 0 to noInput-1 ).toDenseVector, DenseVector[Double](-1.0) )
-    
-    val brzOut = brzData( noInput.toInt to noInput+noOutput-1 ).toVector
-    val brzWeights = weights.toBreeze        
-    val gradient = DenseVector.zeros[Double]( (noInput+1)*noHidden+(noHidden+1)*noOutput )
-    
-    
+
+    val brzData = data.toBreeze
+    val brzInp = DenseVector.vertcat( brzData( 0 to noInput - 1 ).toDenseVector,
+      DenseVector[Double](-1.0) )
+
+    val brzOut = brzData( noInput.toInt to noInput + noOutput - 1 ).toVector
+    val brzWeights = weights.toBreeze
+    val gradient = DenseVector.zeros[Double]( (noInput + 1)*noHidden + (noHidden + 1)*noOutput )
+
+
     val (hidden, output) = computeValues( data, weights )
     var brzHidden = hidden.toBreeze /* already includes the robonode */
     val brzEst = output.toBreeze
     val diff = brzEst :- brzOut
-    val E = diff.dot(diff)    
-    
-    /* 
+    val E = diff.dot(diff)
+
+    /*
      * The following three fields are for verification only
     val eps = .000001
     val noInpCheck = 0
     val noOutCheck = 0
     */
-        
+
     var brzWeights_tmp = weights.toBreeze
-    
+
     /* Wjk */
-    for( j <-0 to noHidden ) {
-      
-      for( k <-0 to noOutput-1 ) {
-        
-    	val brzW = brzWeights( noHidden*(noInput+1)+k*(noHidden+1) to noHidden*(noInput+1)+(k+1)*(noHidden+1)-1 ).toVector
+    for( j <- 0 to noHidden ) {
+
+      for( k <- 0 to noOutput - 1 ) {
+
+        val brzW = brzWeights( noHidden*(noInput + 1) + k*(noHidden + 1) to
+          noHidden*(noInput + 1) + (k + 1)*(noHidden + 1) - 1 ).toVector
         var sum_l = brzHidden.dot( brzW )
-        gradient( noHidden*(noInput+1)+k*(noHidden+1)+j ) = 2*(diff(k))*dg(sum_l)*brzHidden(j)
+        gradient( noHidden*(noInput + 1) + k*(noHidden + 1) + j )
+          = 2*(diff(k))*dg(sum_l)*brzHidden(j)
 
-        /* 
-         * The following is for verification only 
+        /*
+         * The following is for verification only
         if( noInput==noInpCheck && noOutput==noOutCheck )
         {
-	      brzWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j ) = brzWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j ) + eps             
-	      val est2 = computeValues( data, Vectors.fromBreeze( brzWeights_tmp ) )._2.toBreeze
-	      val diff2 = est2 - brzOut      
-	      val d = ( diff2.dot(diff2) - E ) / eps
-	      println( "Calc/Est Wjk: "+ ( gradient( noHidden*(noInput+1)+k*(noHidden+1)+j), d ) )
-		  brzWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j ) = brzWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j ) - eps
-        }        
+        brzWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j )
+          = brzWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j ) + eps
+        val est2 = computeValues( data, Vectors.fromBreeze( brzWeights_tmp ) )._2.toBreeze
+        val diff2 = est2 - brzOut
+        val d = ( diff2.dot(diff2) - E ) / eps
+        println( "Calc/Est Wjk: "+ ( gradient( noHidden*(noInput+1)+k*(noHidden+1)+j), d ) )
+        brzWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j )
+          = brzWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j ) - eps
+        }
         */
 
       }
-      
+
     }
-    
-    /* Vij */    
+
+    /* Vij */
     for( i <- 0 to noInput ) {
-          
-      for( j <- 0 to noHidden-1 ) { /* the hidden robonode has no associated Vij */        
-      
-        for( k<- 0 to noOutput-1 ) {
-        
-          val brzW = brzWeights( noHidden*(noInput+1) to noHidden*(noInput+1)+(noHidden+1)-1 ).toVector
+
+      for( j <- 0 to noHidden - 1 ) { /* the hidden robonode has no associated Vij */
+
+        for( k<- 0 to noOutput - 1 ) {
+
+          val brzW = brzWeights( noHidden*(noInput + 1) to
+            noHidden*(noInput + 1) + (noHidden + 1) - 1 ).toVector
           val sum_n1 = brzHidden.dot( brzW )
-          val brzV = brzWeights( j*(noInput+1) to j*(noInput+1)+(noInput+1)-1 ).toVector
+          val brzV = brzWeights( j*(noInput + 1) to j*(noInput + 1) + (noInput + 1) - 1 ).toVector
           val sum_n2 = brzV.dot( brzInp )
-          gradient( i+j*(noInput+1) ) = 
-          gradient( i+j*(noInput+1) ) + 2*(diff(k))*dg( sum_n1 )*brzWeights( noHidden*(noInput+1)+k*(noHidden+1)+j )*dg( sum_n2 )*brzInp( i )
+          gradient( i + j*(noInput + 1) ) =
+          gradient( i + j*(noInput + 1) ) 
+            + 2*(diff(k))*dg( sum_n1 )*brzWeights( noHidden*(noInput + 1)
+            + k*(noHidden + 1) + j )*dg( sum_n2 )*brzInp( i )
         }
-        
-        /* 
-         * The following is for verification only 
+
+        /*
+         * The following is for verification only
         if( noInput==noInpCheck && noOutput==noOutCheck )
         {
           brzWeights_tmp( i+j*(noInput+1) ) = brzWeights_tmp( i+j*(noInput+1) ) + eps
           val est2 = computeValues( data, Vectors.fromBreeze( brzWeights_tmp ) )._2.toBreeze
           val diff2 = est2 - brzOut
           val d = ( diff2.dot( diff2 ) - E ) / eps
-          println( "Calc/Est Vij: "+ ( gradient( i+j*(noInput+1) ), d ) )        
+          println( "Calc/Est Vij: "+ ( gradient( i+j*(noInput+1) ), d ) )
           brzWeights_tmp( i+j*(noInput+1) ) = brzWeights_tmp( i+j*(noInput+1) ) - eps
         }
         */
       }
-    }      
+    }
     (Vectors.fromBreeze(gradient), E)
 
   }
@@ -331,38 +330,38 @@ class LeastSquaresGradientANN( noInp: Integer, noHid: Integer, noOut: Integer, b
       label: Double,
       weights: Vector,
       cumGradient: Vector): Double = {
-    
+
     val (grad, err) = compute( data, label, weights )
-    
+
     cumGradient.toBreeze += grad.toBreeze
 
     return err
-    
+
   }
 }
 
 class ANNUpdater extends Updater {
-  
+
   override def compute(
       weightsOld: Vector,
       gradient: Vector,
       stepSize: Double,
       iter: Int,
       regParam: Double): (Vector, Double) = {
-       
-    val thisIterStepSize = stepSize  
-    
+
+    val thisIterStepSize = stepSize
+
     val brzWeights: BV[Double] = weightsOld.toBreeze.toDenseVector
-        
+
     brzAxpy(-thisIterStepSize, gradient.toBreeze, brzWeights)
-    
+
     (Vectors.fromBreeze(brzWeights), 0)
   }
-  
+
 }
 
 class ParallelANN (
-    
+
     private var stepSize: Double,
     private var numIterations: Int,
     private var miniBatchFraction: Double,
@@ -370,7 +369,7 @@ class ParallelANN (
     private var noHidden: Int,
     private var noOutput: Int,
     private val beta: Double
-    
+
   ) extends GeneralizedSteepestDescendAlgorithm[ParallelANNModel] with Serializable {
 
   private val gradient = new LeastSquaresGradientANN( noInput, noHidden, noOutput, beta )
@@ -383,9 +382,9 @@ class ParallelANN (
   def this() = {
     this( 0.001, 100, 1.0, 1, 5, 1, 1.0 )
   }
-  
+
   override protected def createModel(weights: Vector) = {
     new ParallelANNModel(weights, noInput, noHidden, noOutput, beta)
-  }  
-  
+  }
+
 }

From 011c10bd3aa4379110a3dacd7824f22429b84f4b Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Wed, 30 Jul 2014 17:01:38 +0800
Subject: [PATCH 010/143] Update GeneralizedSteepestDescendAlgorithm

---
 .../spark/mllib/ann/GeneralizedSteepestDescendAlgorithm   | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescendAlgorithm b/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescendAlgorithm
index 7c436d1067a12..77b73ec5de78d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescendAlgorithm
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescendAlgorithm
@@ -29,12 +29,10 @@ import breeze.linalg.{SparseVector => BSV}
 
 /**
  * :: DeveloperApi ::
- * GeneralizedLinearModel (GLM) represents a model trained using
- * GeneralizedLinearAlgorithm. GLMs consist of a weight vector and
- * an intercept.
+ * GeneralizedSteepestDescendModel represents a model trained using
+ * GeneralizedSteepestDescendAlgorithm.
  *
  * @param weights Weights computed for every feature.
- * @param intercept Intercept computed for this model.
  */
 @DeveloperApi
 abstract class GeneralizedSteepestDescendModel(val weights: Vector )
@@ -110,7 +108,7 @@ abstract class GeneralizedSteepestDescendModel(val weights: Vector )
    * Predict values for a single data point using the model trained.
    *
    * @param testData array representing a single data point
-   * @return Double prediction from the trained model
+   * @return Vector prediction from the trained model
    *
    * Returns the complete vector.
    */

From e7e29aa2b083d4b1e6969cbe0e02befe6e585c7c Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Wed, 30 Jul 2014 17:03:27 +0800
Subject: [PATCH 011/143] Update TestParallelANN.scala

Cleaned up the source layout.
---
 .../spark/mllib/ann/TestParallelANN.scala     | 246 +++++++++---------
 1 file changed, 125 insertions(+), 121 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
index f821f87ebd937..77a52a2d9e2fe 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
@@ -25,207 +25,211 @@ import org.apache.spark.mllib.ann._
 import scala.util.Random
 
 object TestParallelANN {
-  
-  var rand = new Random  
-  
+
+  var rand = new Random
+
   def generateInput2D( f: Double => Double, xmin: Double, xmax: Double, noPoints: Int ): Array[(Vector,Vector)] = {
-    
+
     var out = new Array[(Vector,Vector)](noPoints)
-	
-    for( i <- 0 to noPoints-1 ) {
-      val x = xmin+rand.nextDouble()*(xmax-xmin)
-      val y = f(x)	    
-      out(i) = ( Vectors.dense( x ), Vectors.dense( y ) )	    
-    }	
-	  
+
+    for( i <- 0 to noPoints - 1 ) {
+      val x = xmin + rand.nextDouble()*(xmax - xmin)
+      val y = f(x)
+      out(i) = ( Vectors.dense( x ), Vectors.dense( y ) )
+    }
+
     return out
-  
+
   }
 
-  
+
   def generateInput3D( f: (Double,Double) => Double, xmin: Double, xmax: Double, ymin: Double, ymax: Double, noPoints: Int ): Array[(Vector,Vector)] = {
-	
+
     var out = new Array[(Vector,Vector)](noPoints)
-	
-    for( i <- 0 to noPoints-1 ) {
-      val x = xmin+rand.nextDouble()*(xmax-xmin)
-      val y = ymin+rand.nextDouble()*(ymax-ymin)
+
+    for( i <- 0 to noPoints - 1 ) {
+
+      val x = xmin + rand.nextDouble()*(xmax - xmin)
+      val y = ymin + rand.nextDouble()*(ymax - ymin)
       val z = f( x, y )
+
       var arr = new Array[Double](2)
+
       arr(0) = x
       arr(1) = y
       out(i) = ( Vectors.dense( arr ), Vectors.dense( z ) )
-    } 
-	  
+
+    }
+
     out
-  
-  }  
+
+  }
 
   def generateInput4D( f: Double => (Double,Double,Double), tmin: Double, tmax: Double, noPoints: Int ): Array[(Vector,Vector)] = {
-	
+
     var out = new Array[(Vector,Vector)](noPoints)
-	
-    for( i <- 0 to noPoints-1 ) {
-	  
-      val t: Double = tmin+rand.nextDouble()*(tmax-tmin)
-	  
+
+    for( i <- 0 to noPoints - 1 ) {
+
+      val t: Double = tmin + rand.nextDouble()*(tmax - tmin)
       var arr = new Array[Double](3)
-	  
       var F = f(t)
-	  
+
       arr(0) = F._1
       arr(1) = F._2
       arr(2) = F._3
-	  
+
       out(i) = ( Vectors.dense( t ), Vectors.dense( arr ) )
-    } 
-	  
+    }
+
     out
-  
+
   }
-  
+
   def f( T: Double ): Double = {
-    val y = 0.5+Math.abs(T/5).toInt.toDouble*.15+math.sin(T*math.Pi/10)*.1
+    val y = 0.5 + Math.abs(T/5).toInt.toDouble*.15 + math.sin(T*math.Pi/10)*.1
     assert( y<= 1)
     y
   }
-  
-  def f3D( x: Double, y: Double ): Double = {        
-    .5+.24*Math.sin( x*2*math.Pi/10 ) + .24*Math.cos( y*2*math.Pi/10 )    
+
+  def f3D( x: Double, y: Double ): Double = {
+    .5 + .24*Math.sin( x*2*math.Pi/10 ) + .24*Math.cos( y*2*math.Pi/10 )
   }
-  
-  def f4D( t: Double ): (Double, Double,Double) = {  
-    val x = Math.abs(.8*Math.cos( t*2*math.Pi/20 ) )+.1
-    val y = (11+t)/22
-    val z = .5+.35*Math.sin(t*2*math.Pi/5)*Math.cos( t*2*math.Pi/10 ) + .15*t/11    
+
+  def f4D( t: Double ): (Double, Double,Double) = {
+    val x = Math.abs(.8*Math.cos( t*2*math.Pi/20 ) ) + .1
+    val y = (11 + t)/22
+    val z = .5 + .35*Math.sin(t*2*math.Pi/5)*Math.cos( t*2*math.Pi/10 ) + .15*t/11
     ( x, y, z )
   }
-  
+
   def concat( v1: Vector, v2: Vector ): Vector = {
-    
+
     var a1 = v1.toArray
     var a2 = v2.toArray
     var a3 = new Array[Double]( a1.size + a2.size )
-    
-    for( i <- 0 to a1.size-1 ) {
+
+    for( i <- 0 to a1.size - 1 ) {
       a3(i) = a1(i)
     }
-    
-    for( i<-0 to a2.size-1 ) {
-      a3(i+a1.size) = a2(i)
+
+    for( i <- 0 to a2.size - 1 ) {
+      a3(i + a1.size) = a2(i)
     }
-    
+
     Vectors.dense( a3 )
-    
-  }  
+
+  }
 
   def main( arg: Array[String] ) {
-    
+
     println( "Parallel ANN tester" )
-    
+
     var curAngle: Double = 0.0
     var graphic: Boolean = false
-    
+
     if( (arg.length>0) && (arg(0)=="graph" ) ) {
       graphic = true
     }
-    
+
     var outputFrame2D: OutputFrame2D = null
     var outputFrame3D: OutputFrame3D = null
     var outputFrame4D: OutputFrame3D = null
-        
+
     if( graphic ) {
-      
+
       outputFrame2D = new OutputFrame2D( "x -> y" )
       outputFrame2D.apply
-	    
+
       outputFrame3D = new OutputFrame3D( "(x,y) -> z", 1 )
-      outputFrame3D.apply    
-	        
+      outputFrame3D.apply
+
       outputFrame4D = new OutputFrame3D( "t -> (x,y,z)" )
       outputFrame4D.apply
-      
+
     }
-    
+
     var A = 20.0
     var B = 50.0
-    
+
     var conf = new SparkConf().setAppName("Parallel ANN").setMaster("local[5]")
     var sc = new SparkContext(conf)
-    
-    val testRDD2D = sc.parallelize( generateInput2D( T => f(T), -10, 10, 100 ), 2).cache()    
-    val testRDD3D = sc.parallelize( generateInput3D( (x,y) => f3D(x,y), -10, 10, -10, 10, 100 ), 2).cache    
+
+    val testRDD2D = sc.parallelize( generateInput2D( T => f(T), -10, 10, 100 ), 2).cache()
+    val testRDD3D = sc.parallelize( generateInput3D( (x,y) => f3D(x,y), -10, 10, -10, 10, 100 ), 2).cache
     val testRDD4D = sc.parallelize( generateInput4D( t => f4D(t), -10, 10, 100 ), 2 ).cache
-    
+
     if( graphic ) {
+
       outputFrame2D.setData( testRDD2D.map( T => concat( T._1, T._2 ) ) )
-      outputFrame3D.setData( testRDD3D.map( T => concat( T._1, T._2 ) ) )    
+      outputFrame3D.setData( testRDD3D.map( T => concat( T._1, T._2 ) ) )
       outputFrame4D.setData( testRDD4D.map( T => T._2 ) )
+
     }
 
     val parallelANN2D = new ParallelANNWithSGD( 1, 10 )
-    parallelANN2D.optimizer.setNumIterations(1000).setStepSize( 1.0 )    
-      
+    parallelANN2D.optimizer.setNumIterations(1000).setStepSize( 1.0 )
+
     val parallelANN3D = new ParallelANNWithSGD( 2, 20 )
-    parallelANN3D.optimizer.setNumIterations(1000).setStepSize( 1.0 )        
-        
+    parallelANN3D.optimizer.setNumIterations(1000).setStepSize( 1.0 )
+
     val parallelANN4D = new ParallelANNWithSGD( 1, 20, 3 )
-    parallelANN4D.optimizer.setNumIterations( 1000 ).setStepSize( 1.0 )    
-    
+    parallelANN4D.optimizer.setNumIterations( 1000 ).setStepSize( 1.0 )
+
     var model2D = parallelANN2D.train( testRDD2D )
-    var model3D = parallelANN3D.train( testRDD3D )    
+    var model3D = parallelANN3D.train( testRDD3D )
     var model4D = parallelANN4D.train( testRDD4D )
-    
-    val noIt = 100
+
+    val noIt = 200
     var errHist = new Array[(Int,Double,Double,Double)]( noIt )
-    
+
     val validationRDD2D = sc.parallelize( generateInput2D( T => f(T), -10, 10, 100 ), 2).cache()
     val validationRDD3D = sc.parallelize( generateInput3D( (x,y) => f3D(x,y), -10, 10, -10, 10, 100 ), 2).cache
     val validationRDD4D = sc.parallelize( generateInput4D( t => f4D(t), -10, 10, 100 ), 2 ).cache
 
-    for( i <- 0 to noIt-1 ) {
-      
+    for( i <- 0 to noIt - 1 ) {
+
       val predictedAndTarget2D = validationRDD2D.map( T => ( T._1, T._2, model2D.predictV( T._1 ) ) )
-      val predictedAndTarget3D = validationRDD3D.map( T => ( T._1, T._2, model3D.predictV( T._1 ) ) )	  
+      val predictedAndTarget3D = validationRDD3D.map( T => ( T._1, T._2, model3D.predictV( T._1 ) ) )
       val predictedAndTarget4D = validationRDD4D.map( T => ( T._1, T._2, model4D.predictV( T._1 ) ) )
-	  	  
-      var err2D = predictedAndTarget2D.map( T => 
-        (T._3.toArray(0) - T._2.toArray(0))*(T._3.toArray(0) - T._2.toArray(0)) 
-      ).reduce( (u,v) => u+v )
-	  
-      var err3D = predictedAndTarget3D.map( T => 
-        (T._3.toArray(0) - T._2.toArray(0))*(T._3.toArray(0) - T._2.toArray(0)) 
-      ).reduce( (u,v) => u+v )
+
+      var err2D = predictedAndTarget2D.map( T =>
+        (T._3.toArray(0) - T._2.toArray(0))*(T._3.toArray(0) - T._2.toArray(0))
+      ).reduce( (u,v) => u + v )
+
+      var err3D = predictedAndTarget3D.map( T =>
+        (T._3.toArray(0) - T._2.toArray(0))*(T._3.toArray(0) - T._2.toArray(0))
+      ).reduce( (u,v) => u + v )
 
       var err4D = predictedAndTarget4D.map( T => {
-	    
+
         val v1 = T._2.toArray
         val v2 = T._3.toArray
-	      
-        (v1(0) - v2(0))*(v1(0) - v2(0))+
-        (v1(1) - v2(1))*(v1(1) - v2(1))+
+
+        (v1(0) - v2(0))*(v1(0) - v2(0)) +
+        (v1(1) - v2(1))*(v1(1) - v2(1)) +
         (v1(2) - v2(2))*(v1(2) - v2(2))
-	    
-      } ).reduce( (u,v) => u+v )
-	        
-	  
+
+      } ).reduce( (u,v) => u + v )
+
+
       if( graphic ) {
 
-    	val predicted2D = predictedAndTarget2D.map(
-          T => concat( T._1, T._3 ) 
+        val predicted2D = predictedAndTarget2D.map(
+          T => concat( T._1, T._3 )
         )
-	    
+
         val predicted3D = predictedAndTarget3D.map(
           T => concat( T._1, T._3 )
-        )	  
-	    
+        )
+
         val predicted4D = predictedAndTarget4D.map(
-          T => T._3 
+          T => T._3
         )
-  
-     	curAngle = curAngle + math.Pi/4
+
+        curAngle = curAngle + math.Pi/4
         if( curAngle>=2*math.Pi ) {
-          curAngle = curAngle-2*math.Pi	    
+          curAngle = curAngle - 2*math.Pi
         }
 
         outputFrame3D.setAngle( curAngle )
@@ -234,26 +238,26 @@ object TestParallelANN {
         outputFrame2D.setApproxPoints( predicted2D )
         outputFrame3D.setApproxPoints( predicted3D )
         outputFrame4D.setApproxPoints( predicted4D )
-  	    
+
       }
-	    	  
-      println( "Error 2D/3D/4D: "+(err2D, err3D, err4D) )
+
+      println( "Error 2D/3D/4D: " + (err2D, err3D, err4D) )
       errHist(i) = ( i, err2D, err3D, err4D )
-	    
-      if( i<noIt-1 ) {
+
+      if( i < noIt - 1 ) {
         model2D = parallelANN2D.train( testRDD2D, model2D )
-        model3D = parallelANN3D.train( testRDD3D, model3D )	    
+        model3D = parallelANN3D.train( testRDD3D, model3D )
         model4D = parallelANN4D.train( testRDD4D, model4D )
       }
 
     }
 
-    sc.stop   
-    
-    for( i<-0 to noIt-1 ) {
-      println( errHist(i) ) 
+    sc.stop
+
+    for( i <- 0 to noIt - 1 ) {
+      println( errHist(i) )
     }
-    
+
   }
-  
+
 }

From 100ad4b94acca0014d36cccf7589c1a9c91a84da Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Wed, 30 Jul 2014 17:04:03 +0800
Subject: [PATCH 012/143] Update TestParallelANNgraphics.scala

---
 .../mllib/ann/TestParallelANNgraphics.scala   | 294 ++++++++++--------
 1 file changed, 159 insertions(+), 135 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala
index 4a7a9a712b549..da9386b1c1fe9 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala
@@ -24,102 +24,103 @@ import org.apache.spark.mllib.linalg.Vector
 import scala.Array.canBuildFrom
 
 object windowAdapter extends WindowAdapter {
-  
+
   override def windowClosing( e: WindowEvent ) {
     System.exit(0)
   }
-  
+
 }
 
 class OutputCanvas2D( wd: Int, ht: Int ) extends Canvas {
-  
+
   var points: Array[Vector] = null
   var approxPoints: Array[Vector] = null
-    
+
   /* input: rdd of (x,y) vectors */
   def setData( rdd: RDD[Vector] ) {
-    points = rdd.toArray  
+    points = rdd.toArray
     repaint
   }
-    
-  def setApproxPoints( rdd: RDD[Vector] ) {      
+
+  def setApproxPoints( rdd: RDD[Vector] ) {
     approxPoints = rdd.toArray
     repaint
   }
-    
+
   def plotDot( g: Graphics, x: Int, y: Int ) {
-    val r = 5  
+    val r = 5
     val noSamp = 6*r
     var x1 = x
-    var y1 = y+r
-    for( j<-1 to noSamp ) {
-      val x2 = (x.toDouble+math.sin( j.toDouble*2*math.Pi/noSamp )*r+.5).toInt
-      val y2 = (y.toDouble+math.cos( j.toDouble*2*math.Pi/noSamp )*r+.5).toInt
-      g.drawLine( x1, ht-y1, x2, ht-y2 )
+    var y1 = y + r
+    for( j <- 1 to noSamp ) {
+      val x2 = (x.toDouble + math.sin( j.toDouble*2*math.Pi/noSamp )*r + .5).toInt
+      val y2 = (y.toDouble + math.cos( j.toDouble*2*math.Pi/noSamp )*r + .5).toInt
+      g.drawLine( x1, ht - y1, x2, ht - y2 )
       x1 = x2
       y1 = y2
     }
   }
-  
+
   override def paint( g: Graphics) = {
-	  
-    var xmax: Double = 0.0
-    var xmin: Double = 0.0
-    var ymax: Double = 0.0
-    var ymin: Double = 0.0
-	  
-    if( points!=null ) {
-	  
-      g.setColor( Color.black )
-      val x = points.map( T => (T.toArray)(0) )
-      val y = points.map( T => (T.toArray)(1) )
-		  
-      xmax = x.max
-      xmin = x.min
-      ymax = y.max
-      ymin = y.min
-		  
-      for( i <- 0 to x.size-1 ) {
-	    
-        val xr = (((x(i).toDouble-xmin)/(xmax-xmin))*wd+.5).toInt
-        val yr = (((y(i).toDouble-ymin)/(ymax-ymin))*ht+.5).toInt	    
-        plotDot( g, xr, yr )
-	    
-      }
-	  
+
+  var xmax: Double = 0.0
+  var xmin: Double = 0.0
+  var ymax: Double = 0.0
+  var ymin: Double = 0.0
+
+  if( points!=null ) {
+
+    g.setColor( Color.black )
+    val x = points.map( T => (T.toArray)(0) )
+    val y = points.map( T => (T.toArray)(1) )
+
+    xmax = x.max
+    xmin = x.min
+    ymax = y.max
+    ymin = y.min
+
+    for( i <- 0 to x.size - 1 ) {
+
+      val xr = (((x(i).toDouble - xmin)/(xmax - xmin))*wd + .5).toInt
+      val yr = (((y(i).toDouble - ymin)/(ymax - ymin))*ht + .5).toInt
+      plotDot( g, xr, yr )
+
+    }
+
       if( approxPoints != null ) {
-        
+
         g.setColor( Color.red )
         val x = approxPoints.map( T => (T.toArray)(0) )
         val y = approxPoints.map( T => (T.toArray)(1) )
-		
+
         for( i <- 0 to x.size-1 ) {
-          val xr = (((x(i).toDouble-xmin)/(xmax-xmin))*wd+.5).toInt
-          val yr = (((y(i).toDouble-ymin)/(ymax-ymin))*ht+.5).toInt
+          val xr = (((x(i).toDouble - xmin)/(xmax - xmin))*wd + .5).toInt
+          val yr = (((y(i).toDouble - ymin)/(ymax - ymin))*ht + .5).toInt
           plotDot( g, xr, yr )
         }
-		    
+
       }
 
     }
-	  
+
   }
+
 }
 
 class OutputFrame2D( title: String ) extends Frame( title ) {
-  
+
   val wd = 800
   val ht = 600
-  
+
   var outputCanvas = new OutputCanvas2D( wd, ht )
-  
+
   def apply() {
     addWindowListener( windowAdapter )
     setSize( wd, ht )
     add( "Center", outputCanvas )
     show()
   }
-  
+
   def setData( rdd: RDD[Vector] ) {
     outputCanvas.setData( rdd )
   }
@@ -128,177 +129,200 @@ class OutputFrame2D( title: String ) extends Frame( title ) {
     outputCanvas.setApproxPoints( rdd )
   }
 
-  
+
 }
 
 object windowAdapter3D extends WindowAdapter {
-  
+
   override def windowClosing( e: WindowEvent ) {
     System.exit(0)
   }
-  
+
 }
 
 class OutputCanvas3D( wd: Int, ht: Int, shadowFrac: Double ) extends Canvas {
- 
+
   var angle: Double = 0
   var points: Array[Vector] = null
   var approxPoints: Array[Vector] = null
 
   /* 3 dimensional (x,y,z) vector */
   def setData( rdd: RDD[Vector] ) {
-    points = rdd.toArray  
+    points = rdd.toArray
     repaint
   }
-    
-  def setApproxPoints( rdd: RDD[Vector] ) {      
+
+  def setApproxPoints( rdd: RDD[Vector] ) {
     approxPoints = rdd.toArray
     repaint
   }
-    
+
   def plotDot( g: Graphics, x: Int, y: Int ) {
-    val r = 5  
+    val r = 5
     val noSamp = 6*r
     var x1 = x
-    var y1 = y+r
-    for( j<-1 to noSamp ) {
-      val x2 = (x.toDouble+math.sin( j.toDouble*2*math.Pi/noSamp )*r+.5).toInt
-      val y2 = (y.toDouble+math.cos( j.toDouble*2*math.Pi/noSamp )*r+.5).toInt
-      g.drawLine( x1, ht-y1, x2, ht-y2 )
+    var y1 = y + r
+    for( j <- 1 to noSamp ) {
+      val x2 = (x.toDouble + math.sin( j.toDouble*2*math.Pi/noSamp )*r + .5).toInt
+      val y2 = (y.toDouble + math.cos( j.toDouble*2*math.Pi/noSamp )*r + .5).toInt
+      g.drawLine( x1, ht - y1, x2, ht - y2 )
       x1 = x2
       y1 = y2
     }
   }
-    
+
   def plotLine( g: Graphics, x1: Int, y1: Int, x2: Int, y2: Int ) {
-    g.drawLine( x1, ht-y1, x2, ht-y2 )
+    g.drawLine( x1, ht - y1, x2, ht - y2 )
   }
-        
+
   def calcCord( arr: Array[Double], angle: Double ): (Double, Double, Double, Double, Double, Double) = {
 
     var arrOut = new Array[Double](6)
-    
+
     val x = arr(0)*math.cos( angle ) - arr(1)*math.sin( angle )
     val y = arr(0)*math.sin( angle ) + arr(1)*math.cos( angle )
     val z = arr(2)
-      
+
     val x0 = arr(0)*math.cos( angle ) - arr(1)*math.sin( angle )
     val y0 = arr(0)*math.sin( angle ) + arr(1)*math.cos( angle )
     val z0 = 0
-      
-    val xs = (arr(0)+shadowFrac*arr(2))*math.cos( angle ) - arr(1)*math.sin( angle )
-    val ys = (arr(0)+shadowFrac*arr(2))*math.sin( angle ) + arr(1)*math.cos( angle )
+
+    val xs = (arr(0) + shadowFrac*arr(2))*math.cos( angle ) - arr(1)*math.sin( angle )
+    val ys = (arr(0) + shadowFrac*arr(2))*math.sin( angle ) + arr(1)*math.cos( angle )
     val zs = 0
-      
-    arrOut(0) = y-.5*x
-    arrOut(1) = z-.25*x
-      
-    arrOut(2) = y0-.5*x0
-    arrOut(3) = z0-.25*x0
-      
-    arrOut(4) = ys-.5*xs
-    arrOut(5) = zs-.25*xs
+
+    arrOut(0) = y - .5*x
+    arrOut(1) = z - .25*x
+
+    arrOut(2) = y0 - .5*x0
+    arrOut(3) = z0 - .25*x0
+
+    arrOut(4) = ys - .5*xs
+    arrOut(5) = zs - .25*xs
 
     ( arrOut(0), arrOut(1), arrOut(2), arrOut(3), arrOut(4), arrOut(5) )
-      
+
   }
-  
-  override def paint( g: Graphics) = {	
-	  
+
+  override def paint( g: Graphics) = {
+
     if( points!=null ) {
-		  
+
       var p = points.map( T => calcCord( T.toArray, angle ) ).toArray
-		  
+
       var xmax = p(0)._1
       var xmin = p(0)._1
       var ymax = p(0)._2
       var ymin = p(0)._2
-		  
-      for( i <-0 to p.size-1 ) {
 
-        if( xmax<p(i)._1 ) xmax = p(i)._1
-  	if( xmax<p(i)._3 ) xmax = p(i)._3
-        if( xmax<p(i)._5 ) xmax = p(i)._5		    
+      for( i <- 0 to p.size-1 ) {
 
-        if( xmin>p(i)._1 ) xmin = p(i)._1
-        if( xmin>p(i)._3 ) xmin = p(i)._3
-        if( xmin>p(i)._5 ) xmin = p(i)._5		    
+        if( xmax<p(i)._1 ) {
+          xmax = p(i)._1
+        }
+        if( xmax<p(i)._3 ) {
+          xmax = p(i)._3
+        }
+        if( xmax<p(i)._5 ) {
+          xmax = p(i)._5
+        }
 
-        if( ymax<p(i)._2 ) ymax = p(i)._2
-        if( ymax<p(i)._4 ) ymax = p(i)._4
-        if( ymax<p(i)._6 ) ymax = p(i)._6		    
+        if( xmin>p(i)._1 ) {
+          xmin = p(i)._1
+        }
+        if( xmin>p(i)._3 ) {
+          xmin = p(i)._3
+        }
+        if( xmin>p(i)._5 ) {
+          xmin = p(i)._5
+        }
+
+        if( ymax<p(i)._2 ) {
+          ymax = p(i)._2
+        }
+        if( ymax<p(i)._4 ) {
+          ymax = p(i)._4
+        }
+        if( ymax<p(i)._6 ) {
+          ymax = p(i)._6
+        }
+
+        if( ymin>p(i)._2 ) {
+          ymin = p(i)._2
+        }
+        if( ymin>p(i)._4 ) {
+          ymin = p(i)._4
+        }
+        if( ymin>p(i)._6 ) {
+          ymin = p(i)._6
+        }
 
-        if( ymin>p(i)._2 ) ymin = p(i)._2
-        if( ymin>p(i)._4 ) ymin = p(i)._4
-        if( ymin>p(i)._6 ) ymin = p(i)._6		    
-		      
       }
-		  
+
       for( i <- 0 to p.size-1 ) {
 
-        var x_ = (((p(i)._1-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
-        var y_ = (((p(i)._2-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt
-        var x0 = (((p(i)._3-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
-        var y0 = (((p(i)._4-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt  	        
-        var xs = (((p(i)._5-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
-        var ys = (((p(i)._6-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt
-  	        
+        var x_ = (((p(i)._1 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
+        var y_ = (((p(i)._2 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
+        var x0 = (((p(i)._3 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
+        var y0 = (((p(i)._4 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
+        var xs = (((p(i)._5 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
+        var ys = (((p(i)._6 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
+
         g.setColor( Color.black )
-		  
         plotDot( g, x_, y_ )
         plotLine( g, x_, y_, x0, y0 )
         g.setColor( Color.gray )
         plotLine( g, x0, y0, xs, ys )
-	    
-      }	  
-		  		  
+
+      }
+
       if( approxPoints != null ) {
-		    
+
         var p = approxPoints.map( T => calcCord( T.toArray, angle ) )
-			  			    
+
         for( i <- 0 to p.size-1 ) {
-	
-          var x_ = (((p(i)._1-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
-          var y_ = (((p(i)._2-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt
-          var x0 = (((p(i)._3-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
-          var y0 = (((p(i)._4-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt  	        
-          var xs = (((p(i)._5-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
-          var ys = (((p(i)._6-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt
-	  	        
+
+          var x_ = (((p(i)._1 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
+          var y_ = (((p(i)._2 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
+          var x0 = (((p(i)._3 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
+          var y0 = (((p(i)._4 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
+          var xs = (((p(i)._5 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
+          var ys = (((p(i)._6 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
+
           g.setColor( Color.red )
           plotDot( g, x_, y_ )
           plotLine( g, x_, y_, x0, y0 )
           g.setColor( Color.magenta )
           plotLine( g, x0, y0, xs, ys )
-			    
-        }	  
-		    
-      }		  		  
+
+        }
+
+      }
 
     }
   }
 }
 
 class OutputFrame3D( title: String, shadowFrac: Double ) extends Frame( title ) {
-  
+
   val wd = 800
   val ht = 600
-  
+
   def this( title: String ) = this( title, .25 )
-  
+
   var outputCanvas = new OutputCanvas3D( wd, ht, shadowFrac )
-  
+
   def apply() {
     addWindowListener( windowAdapter3D )
     setSize( wd, ht )
     add( "Center", outputCanvas )
     show()
   }
-  
+
   def setData( rdd: RDD[Vector] ) {
     outputCanvas.setData( rdd )
   }
-  
+
   def setAngle( angle: Double ) {
     outputCanvas.angle = angle
   }
@@ -306,5 +330,5 @@ class OutputFrame3D( title: String, shadowFrac: Double ) extends Frame( title )
   def setApproxPoints( rdd: RDD[Vector] ) {
     outputCanvas.setApproxPoints( rdd )
   }
-    
+
 }

From c9fc3f44ef67f0b04b55cabca27d1716799889e0 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Fri, 1 Aug 2014 13:44:33 +0800
Subject: [PATCH 013/143] Rename GeneralizedSteepestDescendAlgorithm to
 GeneralizedSteepestDescendAlgorithm.scala

---
 ...DescendAlgorithm => GeneralizedSteepestDescendAlgorithm.scala} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename mllib/src/main/scala/org/apache/spark/mllib/ann/{GeneralizedSteepestDescendAlgorithm => GeneralizedSteepestDescendAlgorithm.scala} (100%)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescendAlgorithm b/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescendAlgorithm.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescendAlgorithm
rename to mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescendAlgorithm.scala

From 78f99dc02d925b700c22c717dd7ac263618cce7c Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Fri, 1 Aug 2014 16:04:35 +0800
Subject: [PATCH 014/143] Update TestParallelANNgraphics.scala

Replaced deprecated "toArray" by "collect".
---
 .../apache/spark/mllib/ann/TestParallelANNgraphics.scala  | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala
index da9386b1c1fe9..682bb7cd03ab8 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala
@@ -38,12 +38,12 @@ class OutputCanvas2D( wd: Int, ht: Int ) extends Canvas {
 
   /* input: rdd of (x,y) vectors */
   def setData( rdd: RDD[Vector] ) {
-    points = rdd.toArray
+    points = rdd.collect
     repaint
   }
 
   def setApproxPoints( rdd: RDD[Vector] ) {
-    approxPoints = rdd.toArray
+    approxPoints = rdd.collect
     repaint
   }
 
@@ -148,12 +148,12 @@ class OutputCanvas3D( wd: Int, ht: Int, shadowFrac: Double ) extends Canvas {
 
   /* 3 dimensional (x,y,z) vector */
   def setData( rdd: RDD[Vector] ) {
-    points = rdd.toArray
+    points = rdd.collect
     repaint
   }
 
   def setApproxPoints( rdd: RDD[Vector] ) {
-    approxPoints = rdd.toArray
+    approxPoints = rdd.collect
     repaint
   }
 

From 43103f0e361f6689914938ca7d4b5748288575cd Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Thu, 21 Aug 2014 14:34:18 +0800
Subject: [PATCH 015/143] Update and rename
 GeneralizedSteepestDescendAlgorithm.scala to
 GeneralizedSteepestDescentAlgorithm.scala

Updated naming
---
 ...m.scala => GeneralizedSteepestDescentAlgorithm.scala} | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)
 rename mllib/src/main/scala/org/apache/spark/mllib/ann/{GeneralizedSteepestDescendAlgorithm.scala => GeneralizedSteepestDescentAlgorithm.scala} (95%)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescendAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescentAlgorithm.scala
similarity index 95%
rename from mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescendAlgorithm.scala
rename to mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescentAlgorithm.scala
index 77b73ec5de78d..3bc499fa10270 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescendAlgorithm.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescentAlgorithm.scala
@@ -35,7 +35,8 @@ import breeze.linalg.{SparseVector => BSV}
  * @param weights Weights computed for every feature.
  */
 @DeveloperApi
-abstract class GeneralizedSteepestDescendModel(val weights: Vector )
+abstract class GeneralizedSteepestDescentModel(val weights: Vector )
+
   extends Serializable {
 
   /**
@@ -127,7 +128,7 @@ abstract class GeneralizedSteepestDescendModel(val weights: Vector )
  * This class should be extended with an Optimizer to create a new GLM.
  */
 @DeveloperApi
-abstract class GeneralizedSteepestDescendAlgorithm[M <: GeneralizedSteepestDescendModel]
+abstract class GeneralizedSteepestDescentAlgorithm[M <: GeneralizedSteepestDescentModel]
   extends Logging with Serializable {
 
   /** The optimizer to solve the problem. */
@@ -157,8 +158,8 @@ abstract class GeneralizedSteepestDescendAlgorithm[M <: GeneralizedSteepestDesce
     val data = input.map( v => (
       (0.0).toDouble,
       Vectors.fromBreeze( DenseVector.vertcat(
-        v._1.toBreeze.toDenseVector, 
-        v._2.toBreeze.toDenseVector ) ) 
+        v._1.toBreeze.toDenseVector,
+        v._2.toBreeze.toDenseVector ) )
       ) )
     val weights = optimizer.optimize(data, initialWeights)
 

From 2ecc7d56f5236b5eb988aeaaad709fb251e06a7a Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Thu, 21 Aug 2014 14:35:31 +0800
Subject: [PATCH 016/143] Update ParallelANN.scala

Removed usage of Breeze vectors and optimised computation in the loops
---
 .../apache/spark/mllib/ann/ParallelANN.scala  | 248 ++++++++++--------
 1 file changed, 138 insertions(+), 110 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala
index f43fbb4448fa7..d1a31f2c598a3 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala
@@ -51,55 +51,57 @@ trait ANN {
   def dg( x: Double ) = beta*g(x)*(1 - g(x))
 
   /* returns the hidden layer including the -1 robonode! */
-  def computeHidden( data: Vector, weights: Vector ): Vector = {
+  def computeHidden( data: Array[Double], weights: Array[Double] ): Array[Double] = {
 
-    val brzData = data.toBreeze
-    val brzInp = DenseVector.vertcat( 
-      brzData( 0 to noInput - 1 ).toDenseVector, DenseVector[Double](-1.0) )
-    val brzWeights = weights.toBreeze
-    var hidden = DenseVector.zeros[Double]( noHidden + 1 )
+    var arrHidden = new Array[Double]( noHidden + 1 )
 
     for( j <- 0 to noHidden-1 ) {
 
-      val weightsSubset = brzWeights( 
-        j*(noInput + 1) to j*(noInput + 1) + (noInput + 1) - 1 ).toVector
-      hidden( j ) = g( weightsSubset.dot( brzInp ) )
+      val start = j*(noInput + 1)
+      var v: Double = 0;
+      for( w <- 0 to noInput-1 )
+        v = v + data(w)*weights( start + w )
+      v = v - 1.0 * weights( start + noInput ) // robonode
+      arrHidden( j ) = g( v )
 
     }
 
-    hidden( noHidden ) = -1.0
+    arrHidden( noHidden ) = -1.0
 
-    Vectors.fromBreeze( hidden )
+    arrHidden
 
   }
 
   /* returns the hidden layer including the -1 robonode, as well as the final estimation */
-  def computeValues( data: Vector, weights: Vector ): (Vector, Vector) = {
+  def computeValues( 
+      data: Array[Double], 
+      weights: Array[Double] ): 
+      (Array[Double], Array[Double]) = {
 
     var hidden = computeHidden( data, weights )
     var output = new Array[Double](noOutput)
 
     for( k <- 0 to noOutput - 1 ) {
-      val brzWeights = weights.toBreeze
-      var weightsSubset = brzWeights( noHidden*(noInput + 1) + k*(noHidden + 1) to
-            noHidden*(noInput + 1) + (k + 1)*(noHidden + 1) - 1).toVector
-      output(k) = g( weightsSubset.dot( hidden.toBreeze ) )
+      var tmp: Double = 0.0;
+      for( i <- 0 to noHidden )
+        tmp = tmp + hidden(i)*weights( noHidden * ( noInput + 1 ) + k * ( noHidden + 1 ) + i )
+      output(k) = g( tmp )
+
     }
 
-    ( hidden, Vectors.dense( output ) )
+    ( hidden, output )
 
   }
 
 }
 
-class ParallelANNModel private[mllib]
-(
+class ParallelANNModel private[mllib] (
     override val weights: Vector,
     val noInp: Integer,
     val noHid: Integer,
     val noOut: Integer,
     val b: Double )
-  extends GeneralizedSteepestDescendModel(weights) with RegressionModel with Serializable with ANN {
+  extends GeneralizedSteepestDescentModel(weights) with RegressionModel with Serializable with ANN {
 
   val noInput = noInp
   val noHidden = noHid
@@ -107,17 +109,17 @@ class ParallelANNModel private[mllib]
   val beta = b
 
   override def predictPoint( data: Vector, weights: Vector ): Double = {
-    val outp = computeValues( data, weights )._2
-    outp.toArray(0)
+    val outp = computeValues( data.toArray, weights.toArray )._2
+    outp(0)
   }
 
   def predictPointV( data: Vector, weights: Vector): Vector = {
-    computeValues( data, weights )._2
+    Vectors.dense( computeValues( data.toArray, weights.toArray )._2 )
   }
 
 }
 
-class ParallelANNWithSGD private (
+class ParallelANN private (
     private var stepSize: Double,
     private var numIterations: Int,
     private var miniBatchFraction: Double,
@@ -125,7 +127,7 @@ class ParallelANNWithSGD private (
     private var noHidden: Int,
     private var noOutput: Int,
     private val beta: Double )
-  extends GeneralizedSteepestDescendAlgorithm[ParallelANNModel] with Serializable {
+  extends GeneralizedSteepestDescentAlgorithm[ParallelANNModel] with Serializable {
 
   private val rand = new XORShiftRandom
 
@@ -196,6 +198,16 @@ class ParallelANNWithSGD private (
     run( rdd, model.weights )
   }
 
+  def train( rdd: RDD[(Vector,Vector)], weights: Vector ): ParallelANNModel = {
+
+    val ft = rdd.first()
+    assert( noInput == ft._1.size )
+    assert( noOutput == ft._2.size )
+    assert( weights.size == (noInput + 1) * noHidden + (noHidden + 1) * noOutput )
+    run( rdd, weights );
+
+  }
+
 }
 
 /**
@@ -222,10 +234,10 @@ class ParallelANNWithSGD private (
  */
 
 class LeastSquaresGradientANN(
-    noInp: Integer, 
-    noHid: Integer, 
-    noOut: Integer, 
-    b: Double ) 
+    noInp: Integer,
+    noHid: Integer,
+    noOut: Integer,
+    b: Double )
   extends Gradient with ANN {
 
   val noInput = noInp
@@ -233,55 +245,70 @@ class LeastSquaresGradientANN(
   val noOutput = noOut
   val beta = b
 
+  /* For verification only
+  private val rand = new XORShiftRandom
+  */
+
   override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
 
-    val brzData = data.toBreeze
-    val brzInp = DenseVector.vertcat( brzData( 0 to noInput - 1 ).toDenseVector,
-      DenseVector[Double](-1.0) )
+    val arrData = data.toArray
+    val arrWeights = weights.toArray
 
-    val brzOut = brzData( noInput.toInt to noInput + noOutput - 1 ).toVector
-    val brzWeights = weights.toBreeze
-    val gradient = DenseVector.zeros[Double]( (noInput + 1)*noHidden + (noHidden + 1)*noOutput )
+    var gradient = new Array[Double]( (noInput + 1) * noHidden + (noHidden + 1) * noOutput )
 
+    val (arrHidden, output) = computeValues( arrData, arrWeights )
+    val arrEst = output
 
-    val (hidden, output) = computeValues( data, weights )
-    var brzHidden = hidden.toBreeze /* already includes the robonode */
-    val brzEst = output.toBreeze
-    val diff = brzEst :- brzOut
-    val E = diff.dot(diff)
+    var diff = new Array[Double]( noOutput )
+    var E: Double = 0.0
+    for( i <-0 to noOutput-1 ) {
+      diff( i ) = arrEst( i ) - arrData( noInput.toInt + i );
+      E = E + diff(i) * diff(i)
+    }
 
     /*
-     * The following three fields are for verification only
+     * The following fields are for verification only
     val eps = .000001
-    val noInpCheck = 0
-    val noOutCheck = 0
+    val testOneVOutOf = 5000;
+    val testOneWOutOf = 2500;
+    var arrWeights_tmp = weights.toArray
+    val warnErr = 5e-7
     */
 
-    var brzWeights_tmp = weights.toBreeze
-
     /* Wjk */
-    for( j <- 0 to noHidden ) {
+    for( k <- 0 to noOutput - 1 ) {
+
+      var start = noHidden*(noInput + 1) + k*(noHidden + 1)
+      var sum_l: Double = 0
+      for( w <- 0 to noHidden )
+         sum_l = sum_l +  arrHidden( w ) * arrWeights( w + start )
+      val dg_sum_l = dg( sum_l )
 
-      for( k <- 0 to noOutput - 1 ) {
 
-        val brzW = brzWeights( noHidden*(noInput + 1) + k*(noHidden + 1) to
-          noHidden*(noInput + 1) + (k + 1)*(noHidden + 1) - 1 ).toVector
-        var sum_l = brzHidden.dot( brzW )
+      for( j <- 0 to noHidden ) {
+
         gradient( noHidden*(noInput + 1) + k*(noHidden + 1) + j )
-          = 2*(diff(k))*dg(sum_l)*brzHidden(j)
+          = 2*(diff(k))*dg_sum_l*arrHidden(j)
 
         /*
          * The following is for verification only
-        if( noInput==noInpCheck && noOutput==noOutCheck )
-        {
-        brzWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j )
-          = brzWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j ) + eps
-        val est2 = computeValues( data, Vectors.fromBreeze( brzWeights_tmp ) )._2.toBreeze
-        val diff2 = est2 - brzOut
-        val d = ( diff2.dot(diff2) - E ) / eps
-        println( "Calc/Est Wjk: "+ ( gradient( noHidden*(noInput+1)+k*(noHidden+1)+j), d ) )
-        brzWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j )
-          = brzWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j ) - eps
+        if( rand.nextInt % (testOneWOutOf>>1) == 0 ) {
+          arrWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j )
+            = arrWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j ) + eps
+          val est2 = computeValues( arrData, arrWeights_tmp )._2
+          var E2: Double = 0.0;
+          for( w <- 0 to noOutput-1 ) {
+            val diff2 = est2(w)-data( noInput+w )
+            E2 = E2 + diff2*diff2
+          }
+          val d = ( E2 - E ) / eps
+          val compErr = math.abs( gradient( noHidden*(noInput+1)+k*(noHidden+1)+j) - d )
+          if( compErr > warnErr ) {
+            println( "!!! Calc/Est Wjk: " + 
+              ( ( gradient( noHidden*(noInput+1)+k*(noHidden+1)+j), d ), compErr ) )
+          }
+          arrWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j )
+            = arrWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j ) - eps
         }
         */
 
@@ -289,39 +316,69 @@ class LeastSquaresGradientANN(
 
     }
 
+    var start = noHidden * (noInput + 1)
+    var sum_n1: Double = 0
+    for( w <- 0 to noHidden )
+       sum_n1 = sum_n1 + arrHidden( w )*arrWeights( w + start )
+    val dg_sum_n1 = dg( sum_n1 )
+
+
     /* Vij */
-    for( i <- 0 to noInput ) {
+    for( j <- 0 to noHidden - 1 ) { /* the hidden robonode has no associated Vij */
+
+      start = j * ( noInput + 1 )
+      var sum_n2: Double = 0
+      for( w <- 0 to noInput-1 ) // non-robonodes
+         sum_n2 = sum_n2 + arrData( w )*arrWeights( w + start)
+      sum_n2 = sum_n2 - arrWeights( noInput + start) // robonode
+      val dg_sum_n2 = dg( sum_n2 )
+
+      for( i <- 0 to noInput ) {
 
-      for( j <- 0 to noHidden - 1 ) { /* the hidden robonode has no associated Vij */
 
         for( k<- 0 to noOutput - 1 ) {
 
-          val brzW = brzWeights( noHidden*(noInput + 1) to
-            noHidden*(noInput + 1) + (noHidden + 1) - 1 ).toVector
-          val sum_n1 = brzHidden.dot( brzW )
-          val brzV = brzWeights( j*(noInput + 1) to j*(noInput + 1) + (noInput + 1) - 1 ).toVector
-          val sum_n2 = brzV.dot( brzInp )
-          gradient( i + j*(noInput + 1) ) =
-          gradient( i + j*(noInput + 1) ) 
-            + 2*(diff(k))*dg( sum_n1 )*brzWeights( noHidden*(noInput + 1)
-            + k*(noHidden + 1) + j )*dg( sum_n2 )*brzInp( i )
+          if( i<noInput ) { // non-robonode
+            gradient( i + j * (noInput + 1) ) =
+            gradient( i + j * (noInput + 1) ) +
+              2 * ( diff(k)  )* dg_sum_n1 *
+              arrWeights( noHidden * (noInput + 1) + k * (noHidden + 1) + j ) *
+              dg_sum_n2*arrData( i )
+
+          }
+          else { // robonode
+            gradient( i + j * (noInput + 1) ) =
+            gradient( i + j * (noInput + 1) ) -
+              2 * ( diff(k) ) * dg_sum_n1 *
+              arrWeights( noHidden * (noInput + 1) + k * (noHidden + 1) + j ) *
+              dg_sum_n2
+          }
+
         }
 
         /*
          * The following is for verification only
-        if( noInput==noInpCheck && noOutput==noOutCheck )
-        {
-          brzWeights_tmp( i+j*(noInput+1) ) = brzWeights_tmp( i+j*(noInput+1) ) + eps
-          val est2 = computeValues( data, Vectors.fromBreeze( brzWeights_tmp ) )._2.toBreeze
-          val diff2 = est2 - brzOut
-          val d = ( diff2.dot( diff2 ) - E ) / eps
-          println( "Calc/Est Vij: "+ ( gradient( i+j*(noInput+1) ), d ) )
-          brzWeights_tmp( i+j*(noInput+1) ) = brzWeights_tmp( i+j*(noInput+1) ) - eps
+        if( rand.nextInt % (testOneVOutOf>>1) == 0 ) {
+          arrWeights_tmp( i+j*(noInput+1) ) = arrWeights_tmp( i+j*(noInput+1) ) + eps
+          val est2 = computeValues( arrData, arrWeights_tmp )._2
+
+          var E2: Double = 0.0;
+          for( w <- 0 to noOutput-1 ) {
+            val diff2 = est2(w)-data( noInput+w )
+            E2 = E2 + diff2*diff2
+          }
+
+          val d = ( E2 - E ) / eps
+          val compErr = math.abs( gradient( i+j*(noInput+1) )-d )
+          if( compErr>warnErr )
+            println( "!!! Calc/Est Vij: "+ ( ( gradient( i+j*(noInput+1) ), d ), compErr ) )
+          arrWeights_tmp( i+j*(noInput+1) ) = arrWeights_tmp( i+j*(noInput+1) ) - eps
         }
         */
       }
     }
-    (Vectors.fromBreeze(gradient), E)
+
+    (Vectors.dense(gradient), E)
 
   }
 
@@ -359,32 +416,3 @@ class ANNUpdater extends Updater {
   }
 
 }
-
-class ParallelANN (
-
-    private var stepSize: Double,
-    private var numIterations: Int,
-    private var miniBatchFraction: Double,
-    private var noInput: Int,
-    private var noHidden: Int,
-    private var noOutput: Int,
-    private val beta: Double
-
-  ) extends GeneralizedSteepestDescendAlgorithm[ParallelANNModel] with Serializable {
-
-  private val gradient = new LeastSquaresGradientANN( noInput, noHidden, noOutput, beta )
-  private val updater = new SimpleUpdater()
-  override val optimizer = new GradientDescent(gradient, updater)
-    .setStepSize(stepSize)
-    .setNumIterations(numIterations)
-    .setMiniBatchFraction(miniBatchFraction)
-
-  def this() = {
-    this( 0.001, 100, 1.0, 1, 5, 1, 1.0 )
-  }
-
-  override protected def createModel(weights: Vector) = {
-    new ParallelANNModel(weights, noInput, noHidden, noOutput, beta)
-  }
-
-}

From d80fe634bcedebcb439d78d8254e9a3ab2442188 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Thu, 21 Aug 2014 14:36:45 +0800
Subject: [PATCH 017/143] Update TestParallelANN.scala

Updated with some performance measurements
---
 .../spark/mllib/ann/TestParallelANN.scala     | 31 +++++++++++++------
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
index 77a52a2d9e2fe..b37c8a493dd2b 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
@@ -23,12 +23,15 @@ import org.apache.spark.mllib.regression._
 import org.apache.spark.mllib.linalg._
 import org.apache.spark.mllib.ann._
 import scala.util.Random
+import java.util.Calendar
+import java.text.SimpleDateFormat
 
 object TestParallelANN {
 
-  var rand = new Random
+  var rand = new Random( 0 )
 
-  def generateInput2D( f: Double => Double, xmin: Double, xmax: Double, noPoints: Int ): Array[(Vector,Vector)] = {
+  def generateInput2D( f: Double => Double, xmin: Double, xmax: Double, noPoints: Int ): Array[(Vector,Vector)] =
+  {
 
     var out = new Array[(Vector,Vector)](noPoints)
 
@@ -125,6 +128,9 @@ object TestParallelANN {
 
     println( "Parallel ANN tester" )
 
+    val formatter = new SimpleDateFormat("hh:mm:ss")
+    val starttime = Calendar.getInstance().getTime()
+
     var curAngle: Double = 0.0
     var graphic: Boolean = false
 
@@ -156,7 +162,7 @@ object TestParallelANN {
     var sc = new SparkContext(conf)
 
     val testRDD2D = sc.parallelize( generateInput2D( T => f(T), -10, 10, 100 ), 2).cache()
-    val testRDD3D = sc.parallelize( generateInput3D( (x,y) => f3D(x,y), -10, 10, -10, 10, 100 ), 2).cache
+    val testRDD3D = sc.parallelize( generateInput3D( (x,y) => f3D(x,y), -10, 10, -10, 10, 200 ), 2).cache
     val testRDD4D = sc.parallelize( generateInput4D( t => f4D(t), -10, 10, 100 ), 2 ).cache
 
     if( graphic ) {
@@ -167,23 +173,23 @@ object TestParallelANN {
 
     }
 
-    val parallelANN2D = new ParallelANNWithSGD( 1, 10 )
+    val parallelANN2D = new ParallelANN( 1, 10 )
     parallelANN2D.optimizer.setNumIterations(1000).setStepSize( 1.0 )
 
-    val parallelANN3D = new ParallelANNWithSGD( 2, 20 )
+    val parallelANN3D = new ParallelANN( 2, 20 )
     parallelANN3D.optimizer.setNumIterations(1000).setStepSize( 1.0 )
 
-    val parallelANN4D = new ParallelANNWithSGD( 1, 20, 3 )
+    val parallelANN4D = new ParallelANN( 1, 20, 3 )
     parallelANN4D.optimizer.setNumIterations( 1000 ).setStepSize( 1.0 )
 
     var model2D = parallelANN2D.train( testRDD2D )
     var model3D = parallelANN3D.train( testRDD3D )
     var model4D = parallelANN4D.train( testRDD4D )
 
-    val noIt = 200
+    val noIt = 20
     var errHist = new Array[(Int,Double,Double,Double)]( noIt )
 
-    val validationRDD2D = sc.parallelize( generateInput2D( T => f(T), -10, 10, 100 ), 2).cache()
+    val validationRDD2D = sc.parallelize( generateInput2D( T => f(T), -10, 10, 100 ), 2).cache
     val validationRDD3D = sc.parallelize( generateInput3D( (x,y) => f3D(x,y), -10, 10, -10, 10, 100 ), 2).cache
     val validationRDD4D = sc.parallelize( generateInput4D( t => f4D(t), -10, 10, 100 ), 2 ).cache
 
@@ -241,7 +247,10 @@ object TestParallelANN {
 
       }
 
-      println( "Error 2D/3D/4D: " + (err2D, err3D, err4D) )
+      val now = Calendar.getInstance().getTime()
+      val times = formatter.format( now );
+
+      println( "It. "+i+" ("+times+"), Error 2D/3D/4D: " + (err2D, err3D, err4D) )
       errHist(i) = ( i, err2D, err3D, err4D )
 
       if( i < noIt - 1 ) {
@@ -254,10 +263,14 @@ object TestParallelANN {
 
     sc.stop
 
+    val stoptime = Calendar.getInstance().getTime()
+
     for( i <- 0 to noIt - 1 ) {
       println( errHist(i) )
     }
 
+    println( formatter.format( starttime )+"-" + formatter.format( stoptime ) + " "+(stoptime.getTime-starttime.getTime+500)/1000+" seconds" )
+
   }
 
 }

From 149a726012f1479537792bd54660eba3c6cd6812 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Thu, 21 Aug 2014 14:37:43 +0800
Subject: [PATCH 018/143] Update TestParallelANNgraphics.scala

Cleaned code
---
 .../mllib/ann/TestParallelANNgraphics.scala   | 34 +++++++++----------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala
index 682bb7cd03ab8..e206a8b7072a3 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala
@@ -63,29 +63,29 @@ class OutputCanvas2D( wd: Int, ht: Int ) extends Canvas {
 
   override def paint( g: Graphics) = {
 
-  var xmax: Double = 0.0
-  var xmin: Double = 0.0
-  var ymax: Double = 0.0
-  var ymin: Double = 0.0
+    var xmax: Double = 0.0
+    var xmin: Double = 0.0
+    var ymax: Double = 0.0
+    var ymin: Double = 0.0
 
-  if( points!=null ) {
+    if( points!=null ) {
 
-    g.setColor( Color.black )
-    val x = points.map( T => (T.toArray)(0) )
-    val y = points.map( T => (T.toArray)(1) )
+      g.setColor( Color.black )
+      val x = points.map( T => (T.toArray)(0) )
+      val y = points.map( T => (T.toArray)(1) )
 
-    xmax = x.max
-    xmin = x.min
-    ymax = y.max
-    ymin = y.min
+      xmax = x.max
+      xmin = x.min
+      ymax = y.max
+      ymin = y.min
 
-    for( i <- 0 to x.size - 1 ) {
+      for( i <- 0 to x.size - 1 ) {
 
-      val xr = (((x(i).toDouble - xmin)/(xmax - xmin))*wd + .5).toInt
-      val yr = (((y(i).toDouble - ymin)/(ymax - ymin))*ht + .5).toInt
-      plotDot( g, xr, yr )
+        val xr = (((x(i).toDouble - xmin)/(xmax - xmin))*wd + .5).toInt
+        val yr = (((y(i).toDouble - ymin)/(ymax - ymin))*ht + .5).toInt
+        plotDot( g, xr, yr )
 
-    }
+      }
 
       if( approxPoints != null ) {
 

From ace988ebccd2d1b25b9f24bf917ee81853264395 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Fri, 22 Aug 2014 15:03:08 +0800
Subject: [PATCH 019/143] Create mllib-ann.md

Documentation for Artificial Neural Network (ANN)
---
 docs/mllib-ann.md | 179 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 179 insertions(+)
 create mode 100644 docs/mllib-ann.md

diff --git a/docs/mllib-ann.md b/docs/mllib-ann.md
new file mode 100644
index 0000000000000..1b066d2af38ad
--- /dev/null
+++ b/docs/mllib-ann.md
@@ -0,0 +1,179 @@
+layout: global
+title: Linear Methods - MLlib
+displayTitle: <a href="mllib-guide.html">MLlib</a> - Linear Methods
+---
+
+* Table of contents
+{:toc}
+
+### Introduction
+
+This document describes the MLLIB's Artificial Neural Network (ANN) implementation.
+
+The implementation currently consist of the following files:
+
+* 'ParallelANN.scala': implements the ANN
+* 'GeneralizedSteepestDescentAlgorithm.scala': provides an abstract class and model as basis for 'ParallelANN'.
+
+In addition, there is a demo/test available:
+
+* 'TestParallelANN.scala': tests parallel ANNs for various functions
+* 'TestParallelANNgraphics.scala': graphical output for 'TestParallelANN.scala'
+
+### Architecture and Notation
+
+The file ParallelANN.scala implements a three-layer ANN with the following architecture:
+
+
+ +-------+
+ |       |
+ |  X_0  |
+ |       | 
+ +-------+       +-------+
+                 |       |
+ +-------+       |  H_0  |      +-------+
+ |       |       |       |      |       |
+ |  X_1  |-      +-------+    ->|  O_0  |
+ |       | \ Vij             /  |       |
+ +-------+  -    +-------+  -   +-------+
+              \  |       | / Wjk
+     :         ->|  H_1  |-     +-------+
+     :           |       |      |       |
+     :           +-------+      |  O_1  |
+     :                          |       |
+     :               :          +-------+
+     :               :
+     :               :              :
+     :               : 
+     :               :          +-------+
+     :               :          |       |
+     :               :          | O_K-1 |
+     :                          |       |
+     :           +-------+      +-------+
+     :           |       |
+     :           | H_J-1 |
+                 |       |
+ +-------+       +-------+
+ |       | 
+ | X_I-1 |  
+ |       |
+ +-------+
+
+ +-------+      +--------+
+ |       |      |        |
+ |   -1  |      |   -1   |
+ |       |      |        |
+ +-------+      +--------+
+
+INPUT LAYER     HIDDEN LAYER    OUTPUT LAYER
+
+
+The nodes '$X_0$' to '$X_{I-1}$' are the '$I$' input nodes. The nodes '$H_0$' to '$H_{J-1}$' are the '$J$' hidden nodes and the nodes '$O_0$' to '$O_{K-1}$' are the '$K$' output nodes. Between each input node '$X_i$' and hidden node '$H_j$' there is a weight '$V_{ij}$'. Likewise, between each hidden node '$H_j$' and each output node '$O_k$' is a weight '$W_{jk}$'. 
+
+The ANN also implements two bias units. These are nodes that always output the value -1. The bias units are in the input and in the hidden layer. They act as normal nodes, except that the bias unit in the hidden layer has no input. The bias units can also be denoted by '$X_I$' and '$H_J$'.
+
+The value of a hidden node '$H_j$' is calculated as follows:
+
+'$H_j = g ( \sum_{i=0}^{I} X_i*V_{i,j} )$'
+
+Likewise, the value of the output node '$O_k$' is calculated as follows:
+
+'$O_k = g( \sum_{j=0}^{J} H_j*W_{j,k} )$'
+
+Where '$g$' is the sigmod function
+
+'$g(t) = \frac{e^{\beta t} }{1+e^{\beta t}}$'
+
+and '$\beta' the learning rate.
+
+### Gradient descent
+
+Currently, the MLLIB uses gradent descent for training. This means that the weights '$V_{ij}$' and '$W_{jk}$' are updated by adding a fraction of the gradient to '$V_{ij}$' and '$W_{jk}$' of the following function:
+
+'$E = \sum_{k=0}^{K-1} (O_k - Y_k )^2$'
+
+where '$Y_k$' is the target output given inputs '$X_0$' ... '$X_{I-1}$'
+
+Calculations provide that:
+
+'$\frac{\partial E}{\partial W_{jk}} = 2 (O_k-Y_k) \cdot H_j \cdot g' \left( \sum_{m=0}^{J} W_{mk} H_m \right)$'
+
+and
+
+'$\frac{\partial E}{\partial V_{ij}} = 2 \sum_{k=0}^{K-1} \left( (O_k - Y_k)  \cdot X_i \cdot W_{jk} \cdot g'\left( \sum_{n=0}^{J} W_{nk} H_n \right) g'\left( \sum_{m=0}^{I} V_{mj} X_i \right) \right)$'
+
+The training step consists of the two operations
+
+'$V_{ij} = V_{ij} - \epsilon \frac{\partial E}{\partial V_{ij}}$'
+
+and
+
+'$W_{jk} = W_{jk} - \epsilon \frac{\partial E}{\partial W_{jk}}$'
+
+where '$\epsilon$' is the step size.
+
+### Implementation Details
+
+## The 'ParallelANN' class
+
+The 'ParallelANN' class is the main class of the ANN. This class uses a trait 'ANN', which includes functions for calculating the hidden layer ('computeHidden') and calculation of the output ('computeValues'). The output of 'computeHidden' includes the bias node in the hidden layer, such that it does not need to handle the hidden bias node differently.
+
+The 'ParallelANN' class has the following constructors:
+
+'ParallelANN( stepSize, numIterations, miniBatchFraction, noInput, noHidden, noOutput, beta )'
+'ParallelANN()': assumes 'stepSize'=1.0, 'numIterations'=100, 'miniBatchFraction'=1.0, 'noInput'=1, 'noHidden'=5, 'noOutput'=1, 'beta'=1.0.
+'ParallelANN( noHidden )': as 'ParallelANN()', but allows specification of 'noHidden'
+'ParallelANN( noInput, noHidden )': as 'ParallelANN()', but allows specification of number of 'noInput' and 'noHidden'
+'ParallelANN( noInput, noHidden, noOutput )': as 'ParallelANN()', but allows specification of 'noInput', 'noHidden' and 'noOutput'
+
+The number of input nodes '$I$' is stored in the variable 'noInput', the number of hidden nodes '$J$' is stored in 'noHidden' and the number of output nodes '$K$' is stored in 'noOutput'. 'beta' contains the value of '$\beta$' for the sigmoid function.
+
+The parameters 'stepSize', 'numIterations' and 'miniBatchFraction' are of use for the Statistical Gradient Descent function.
+
+In addition, it has a single vector 'weights' corresponding to $V_{ij}$ and $W_{jk}$. The mapping of '$V_{ij}$' and '$W_{jk}$' into 'weights' is as follows:
+
+'$V_{ij}$' -> 'weights[  i + j*(noInput+1) ]$'
+
+'$W_{jk}$' -> 'weights[ (noInput+1)*noHidden + j + k*(noHidden+1) ]$'
+
+The training function carries the name 'train'. It can take various inputs:
+
+'def train( rdd: RDD[(Vector,Vector)] )': starts a complete new training session and generates a new ANN.
+'def train( rdd: RDD[(Vector,Vector)], model: ParallelANNModel )': continues a training session with an existing ANN.
+'def train( rdd: RDD[(Vector,Vector)], weights: Vector )': starts a training session using initial weights as indicated by 'weights'.
+
+The input of the training function is an RDD with (input/output) training pairs, each input and output being stored as a 'Vector'. The training function returns a variable of from class 'ParallelANNModel', as described below.
+
+## The 'ParallelANNModel' class
+
+All information needed for the ANN is stored in the 'ParallelANNModel' class. The training function 'train' from 'ParallelANN' returns an object from the 'ParallelANNModel' class.
+
+The information in 'parallelANNModel' consist of the weights, the number of input, hidden and output nodes, as well as two functions 'predictPoint' and 'predictPointV'.
+
+The 'predictPoint' function is used to calculate a single output value as a 'Double'. If the output of the ANN actually is a vector, it returns just the first element of the vector, that is '$O_{0}$'. The output of the 'predictPointV' is of type 'Vector', and returns all '$K$' output values.
+
+## The 'GeneralizedSteepestDescentAlgorithm' class
+
+The 'GeneralizedSteepestDescendAlgorithm' class is based on the 'GeneralizedLinearAlgorithm' class. The main difference is that the 'GeneralizedSteepestDescentAlgorithm' is based on output values of type 'Vector', whereas 'GeneralizedLinearAlgorithm' is based of output values of type 'Double'. The new class was needed, because an ANN ideally outputs multiple values, hence a 'Vector'.
+
+## Training
+
+Science has provided many different strategies to train an ANN. Hence it is important that the optimising functions in MLLIB's ANN are interchangeable. The ParallelANN class has a variable 'optimizer', which is currently set to a 'GradientDescent' optimising class. The 'GradientDescent' optimising class implements a stochastic gradient descent method, and is also used for other optimisation technologies in Spark. It is expected that other optimising functions will be defined for Spark, and these can be stored in the 'optimizer' variable.
+
+### Demo/test
+
+Usage of MLLIB's ANN is demonstrated through the 'TestParallelANN' demo program. The program generates three functions:
+
+* f2d: x -> y
+* f3d: (x,y) -> z
+* f4d: t -> (x,y,z)
+
+When the program is given the Java argument 'graph', it will show a graphical representation of the target function and the latest values.
+
+### Conclusion
+
+The 'ParallelANN' class implements a Artificial Neural Network (ANN), using the stochastic gradient descent method. It takes as input an RDD of input/output values of type 'Vector', and returns an object of type 'ParallelANNModel' containing the parameters of the trained ANN. The 'ParallelANNModel' object can also be used to calculate results after training.
+
+The training of an ANN can be interrupted and later continued, allowing intermediate inspection of the results.
+
+A demo program for ANN is provided.

From 9f75f59c9c884083eae210aeb980c2a83b1a125b Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Fri, 22 Aug 2014 15:23:14 +0800
Subject: [PATCH 020/143] Update mllib-ann.md

Edit layout to make it more readible
---
 docs/mllib-ann.md | 65 +++++++++++++++++++++++------------------------
 1 file changed, 32 insertions(+), 33 deletions(-)

diff --git a/docs/mllib-ann.md b/docs/mllib-ann.md
index 1b066d2af38ad..70d35978f8a6c 100644
--- a/docs/mllib-ann.md
+++ b/docs/mllib-ann.md
@@ -24,7 +24,7 @@ In addition, there is a demo/test available:
 
 The file ParallelANN.scala implements a three-layer ANN with the following architecture:
 
-
+```
  +-------+
  |       |
  |  X_0  |
@@ -66,51 +66,51 @@ The file ParallelANN.scala implements a three-layer ANN with the following archi
  +-------+      +--------+
 
 INPUT LAYER     HIDDEN LAYER    OUTPUT LAYER
+```
 
+The nodes X_0 to X_{I-1} are the I input nodes. The nodes H_0 to H_{J-1} are the J hidden nodes and the nodes O_0 to O_{K-1} are the K output nodes. Between each input node X_i and hidden node H_j there is a weight V_{ij}. Likewise, between each hidden node H_j and each output node O_k is a weight W_{jk}. 
 
-The nodes '$X_0$' to '$X_{I-1}$' are the '$I$' input nodes. The nodes '$H_0$' to '$H_{J-1}$' are the '$J$' hidden nodes and the nodes '$O_0$' to '$O_{K-1}$' are the '$K$' output nodes. Between each input node '$X_i$' and hidden node '$H_j$' there is a weight '$V_{ij}$'. Likewise, between each hidden node '$H_j$' and each output node '$O_k$' is a weight '$W_{jk}$'. 
-
-The ANN also implements two bias units. These are nodes that always output the value -1. The bias units are in the input and in the hidden layer. They act as normal nodes, except that the bias unit in the hidden layer has no input. The bias units can also be denoted by '$X_I$' and '$H_J$'.
+The ANN also implements two bias units. These are nodes that always output the value -1. The bias units are in the input and in the hidden layer. They act as normal nodes, except that the bias unit in the hidden layer has no input. The bias units can also be denoted by X_I and H_J.
 
-The value of a hidden node '$H_j$' is calculated as follows:
+The value of a hidden node H_j is calculated as follows:
 
-'$H_j = g ( \sum_{i=0}^{I} X_i*V_{i,j} )$'
+`$H_j = g ( \sum_{i=0}^{I} X_i*V_{i,j} )$`
 
-Likewise, the value of the output node '$O_k$' is calculated as follows:
+Likewise, the value of the output node O_k is calculated as follows:
 
-'$O_k = g( \sum_{j=0}^{J} H_j*W_{j,k} )$'
+`$O_k = g( \sum_{j=0}^{J} H_j*W_{j,k} )$`
 
-Where '$g$' is the sigmod function
+Where g is the sigmod function
 
-'$g(t) = \frac{e^{\beta t} }{1+e^{\beta t}}$'
+`$g(t) = \frac{e^{\beta t} }{1+e^{\beta t}}$`
 
-and '$\beta' the learning rate.
+and `$\beta` the learning rate.
 
 ### Gradient descent
 
-Currently, the MLLIB uses gradent descent for training. This means that the weights '$V_{ij}$' and '$W_{jk}$' are updated by adding a fraction of the gradient to '$V_{ij}$' and '$W_{jk}$' of the following function:
+Currently, the MLLIB uses gradent descent for training. This means that the weights V_{ij} and W_{jk} are updated by adding a fraction of the gradient to V_{ij} and W_{jk} of the following function:
 
-'$E = \sum_{k=0}^{K-1} (O_k - Y_k )^2$'
+`$E = \sum_{k=0}^{K-1} (O_k - Y_k )^2$`
 
-where '$Y_k$' is the target output given inputs '$X_0$' ... '$X_{I-1}$'
+where Y_k is the target output given inputs X_0 ... X_{I-1}
 
 Calculations provide that:
 
-'$\frac{\partial E}{\partial W_{jk}} = 2 (O_k-Y_k) \cdot H_j \cdot g' \left( \sum_{m=0}^{J} W_{mk} H_m \right)$'
+`$\frac{\partial E}{\partial W_{jk}} = 2 (O_k-Y_k) \cdot H_j \cdot g' \left( \sum_{m=0}^{J} W_{mk} H_m \right)$`
 
 and
 
-'$\frac{\partial E}{\partial V_{ij}} = 2 \sum_{k=0}^{K-1} \left( (O_k - Y_k)  \cdot X_i \cdot W_{jk} \cdot g'\left( \sum_{n=0}^{J} W_{nk} H_n \right) g'\left( \sum_{m=0}^{I} V_{mj} X_i \right) \right)$'
+`$\frac{\partial E}{\partial V_{ij}} = 2 \sum_{k=0}^{K-1} \left( (O_k - Y_k)  \cdot X_i \cdot W_{jk} \cdot g'\left( \sum_{n=0}^{J} W_{nk} H_n \right) g'\left( \sum_{m=0}^{I} V_{mj} X_i \right) \right)$`
 
 The training step consists of the two operations
 
-'$V_{ij} = V_{ij} - \epsilon \frac{\partial E}{\partial V_{ij}}$'
+`$V_{ij} = V_{ij} - \epsilon \frac{\partial E}{\partial V_{ij}}$`
 
 and
 
-'$W_{jk} = W_{jk} - \epsilon \frac{\partial E}{\partial W_{jk}}$'
+`$W_{jk} = W_{jk} - \epsilon \frac{\partial E}{\partial W_{jk}}$`
 
-where '$\epsilon$' is the step size.
+where `$\epsilon$` is the step size.
 
 ### Implementation Details
 
@@ -120,27 +120,26 @@ The 'ParallelANN' class is the main class of the ANN. This class uses a trait 'A
 
 The 'ParallelANN' class has the following constructors:
 
-'ParallelANN( stepSize, numIterations, miniBatchFraction, noInput, noHidden, noOutput, beta )'
-'ParallelANN()': assumes 'stepSize'=1.0, 'numIterations'=100, 'miniBatchFraction'=1.0, 'noInput'=1, 'noHidden'=5, 'noOutput'=1, 'beta'=1.0.
-'ParallelANN( noHidden )': as 'ParallelANN()', but allows specification of 'noHidden'
-'ParallelANN( noInput, noHidden )': as 'ParallelANN()', but allows specification of number of 'noInput' and 'noHidden'
-'ParallelANN( noInput, noHidden, noOutput )': as 'ParallelANN()', but allows specification of 'noInput', 'noHidden' and 'noOutput'
+* `ParallelANN( stepSize, numIterations, miniBatchFraction, noInput, noHidden, noOutput, beta )`
+* `ParallelANN()`: assumes 'stepSize'=1.0, 'numIterations'=100, 'miniBatchFraction'=1.0, 'noInput'=1, 'noHidden'=5, noOutput'=1, 'beta'=1.0.
+* `ParallelANN( noHidden )`: as 'ParallelANN()', but allows specification of 'noHidden'
+* `ParallelANN( noInput, noHidden )`: as 'ParallelANN()', but allows specification of number of 'noInput' and 'noHidden'
+* `ParallelANN( noInput, noHidden, noOutput )`: as 'ParallelANN()', but allows specification of 'noInput', 'noHidden' and 'noOutput'
 
-The number of input nodes '$I$' is stored in the variable 'noInput', the number of hidden nodes '$J$' is stored in 'noHidden' and the number of output nodes '$K$' is stored in 'noOutput'. 'beta' contains the value of '$\beta$' for the sigmoid function.
+The number of input nodes I is stored in the variable 'noInput', the number of hidden nodes J is stored in 'noHidden' and the number of output nodes K is stored in 'noOutput'. 'beta' contains the value of '$\beta$' for the sigmoid function.
 
 The parameters 'stepSize', 'numIterations' and 'miniBatchFraction' are of use for the Statistical Gradient Descent function.
 
-In addition, it has a single vector 'weights' corresponding to $V_{ij}$ and $W_{jk}$. The mapping of '$V_{ij}$' and '$W_{jk}$' into 'weights' is as follows:
-
-'$V_{ij}$' -> 'weights[  i + j*(noInput+1) ]$'
+In addition, it has a single vector 'weights' corresponding to V_{ij} and W_{jk}. The mapping of V_{ij} and W_{jk} into 'weights' is as follows:
 
-'$W_{jk}$' -> 'weights[ (noInput+1)*noHidden + j + k*(noHidden+1) ]$'
+* V_{ij} -> `weights[  i + j*(noInput+1) ]$`
+* W_{jk} -> `weights[ (noInput+1)*noHidden + j + k*(noHidden+1) ]$`
 
 The training function carries the name 'train'. It can take various inputs:
 
-'def train( rdd: RDD[(Vector,Vector)] )': starts a complete new training session and generates a new ANN.
-'def train( rdd: RDD[(Vector,Vector)], model: ParallelANNModel )': continues a training session with an existing ANN.
-'def train( rdd: RDD[(Vector,Vector)], weights: Vector )': starts a training session using initial weights as indicated by 'weights'.
+* `def train( rdd: RDD[(Vector,Vector)] )`: starts a complete new training session and generates a new ANN.
+* `def train( rdd: RDD[(Vector,Vector)], model: ParallelANNModel )`: continues a training session with an existing ANN.
+* `def train( rdd: RDD[(Vector,Vector)], weights: Vector )`: starts a training session using initial weights as indicated by 'weights'.
 
 The input of the training function is an RDD with (input/output) training pairs, each input and output being stored as a 'Vector'. The training function returns a variable of from class 'ParallelANNModel', as described below.
 
@@ -150,7 +149,7 @@ All information needed for the ANN is stored in the 'ParallelANNModel' class. Th
 
 The information in 'parallelANNModel' consist of the weights, the number of input, hidden and output nodes, as well as two functions 'predictPoint' and 'predictPointV'.
 
-The 'predictPoint' function is used to calculate a single output value as a 'Double'. If the output of the ANN actually is a vector, it returns just the first element of the vector, that is '$O_{0}$'. The output of the 'predictPointV' is of type 'Vector', and returns all '$K$' output values.
+The 'predictPoint' function is used to calculate a single output value as a 'Double'. If the output of the ANN actually is a vector, it returns just the first element of the vector, that is O_{0}. The output of the 'predictPointV' is of type 'Vector', and returns all K output values.
 
 ## The 'GeneralizedSteepestDescentAlgorithm' class
 

From c81de0c90eb50d8d3f0a6d59f534e05e2308810d Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Fri, 22 Aug 2014 15:29:42 +0800
Subject: [PATCH 021/143] Update mllib-ann.md

Edited font size headers
---
 docs/mllib-ann.md | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/docs/mllib-ann.md b/docs/mllib-ann.md
index 70d35978f8a6c..b2b3c91314c7d 100644
--- a/docs/mllib-ann.md
+++ b/docs/mllib-ann.md
@@ -1,12 +1,12 @@
 layout: global
-title: Linear Methods - MLlib
-displayTitle: <a href="mllib-guide.html">MLlib</a> - Linear Methods
+title: Artificial Neural Networks - MLlib
+displayTitle: <a href="mllib-guide.html">MLlib</a> - Artificial Neural Networks
 ---
 
 * Table of contents
 {:toc}
 
-### Introduction
+# Introduction
 
 This document describes the MLLIB's Artificial Neural Network (ANN) implementation.
 
@@ -20,7 +20,7 @@ In addition, there is a demo/test available:
 * 'TestParallelANN.scala': tests parallel ANNs for various functions
 * 'TestParallelANNgraphics.scala': graphical output for 'TestParallelANN.scala'
 
-### Architecture and Notation
+# Architecture and Notation
 
 The file ParallelANN.scala implements a three-layer ANN with the following architecture:
 
@@ -86,7 +86,7 @@ Where g is the sigmod function
 
 and `$\beta` the learning rate.
 
-### Gradient descent
+# Gradient descent
 
 Currently, the MLLIB uses gradent descent for training. This means that the weights V_{ij} and W_{jk} are updated by adding a fraction of the gradient to V_{ij} and W_{jk} of the following function:
 
@@ -112,7 +112,7 @@ and
 
 where `$\epsilon$` is the step size.
 
-### Implementation Details
+# Implementation Details
 
 ## The 'ParallelANN' class
 
@@ -159,7 +159,7 @@ The 'GeneralizedSteepestDescendAlgorithm' class is based on the 'GeneralizedLine
 
 Science has provided many different strategies to train an ANN. Hence it is important that the optimising functions in MLLIB's ANN are interchangeable. The ParallelANN class has a variable 'optimizer', which is currently set to a 'GradientDescent' optimising class. The 'GradientDescent' optimising class implements a stochastic gradient descent method, and is also used for other optimisation technologies in Spark. It is expected that other optimising functions will be defined for Spark, and these can be stored in the 'optimizer' variable.
 
-### Demo/test
+# Demo/test
 
 Usage of MLLIB's ANN is demonstrated through the 'TestParallelANN' demo program. The program generates three functions:
 
@@ -169,7 +169,7 @@ Usage of MLLIB's ANN is demonstrated through the 'TestParallelANN' demo program.
 
 When the program is given the Java argument 'graph', it will show a graphical representation of the target function and the latest values.
 
-### Conclusion
+# Conclusion
 
 The 'ParallelANN' class implements a Artificial Neural Network (ANN), using the stochastic gradient descent method. It takes as input an RDD of input/output values of type 'Vector', and returns an object of type 'ParallelANNModel' containing the parameters of the trained ANN. The 'ParallelANNModel' object can also be used to calculate results after training.
 

From 3c456b57ac486aa016b8f692d375a7769092f8ab Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Fri, 22 Aug 2014 15:33:15 +0800
Subject: [PATCH 022/143] Update mllib-ann.md

---
 docs/mllib-ann.md | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/docs/mllib-ann.md b/docs/mllib-ann.md
index b2b3c91314c7d..a653724c06997 100644
--- a/docs/mllib-ann.md
+++ b/docs/mllib-ann.md
@@ -1,14 +1,12 @@
+---
 layout: global
 title: Artificial Neural Networks - MLlib
 displayTitle: <a href="mllib-guide.html">MLlib</a> - Artificial Neural Networks
 ---
 
-* Table of contents
-{:toc}
-
 # Introduction
 
-This document describes the MLLIB's Artificial Neural Network (ANN) implementation.
+This document describes the MLlib's Artificial Neural Network (ANN) implementation.
 
 The implementation currently consist of the following files:
 
@@ -88,7 +86,7 @@ and `$\beta` the learning rate.
 
 # Gradient descent
 
-Currently, the MLLIB uses gradent descent for training. This means that the weights V_{ij} and W_{jk} are updated by adding a fraction of the gradient to V_{ij} and W_{jk} of the following function:
+Currently, the MLlib uses gradent descent for training. This means that the weights V_{ij} and W_{jk} are updated by adding a fraction of the gradient to V_{ij} and W_{jk} of the following function:
 
 `$E = \sum_{k=0}^{K-1} (O_k - Y_k )^2$`
 
@@ -157,11 +155,11 @@ The 'GeneralizedSteepestDescendAlgorithm' class is based on the 'GeneralizedLine
 
 ## Training
 
-Science has provided many different strategies to train an ANN. Hence it is important that the optimising functions in MLLIB's ANN are interchangeable. The ParallelANN class has a variable 'optimizer', which is currently set to a 'GradientDescent' optimising class. The 'GradientDescent' optimising class implements a stochastic gradient descent method, and is also used for other optimisation technologies in Spark. It is expected that other optimising functions will be defined for Spark, and these can be stored in the 'optimizer' variable.
+Science has provided many different strategies to train an ANN. Hence it is important that the optimising functions in MLlib's ANN are interchangeable. The ParallelANN class has a variable 'optimizer', which is currently set to a 'GradientDescent' optimising class. The 'GradientDescent' optimising class implements a stochastic gradient descent method, and is also used for other optimisation technologies in Spark. It is expected that other optimising functions will be defined for Spark, and these can be stored in the 'optimizer' variable.
 
 # Demo/test
 
-Usage of MLLIB's ANN is demonstrated through the 'TestParallelANN' demo program. The program generates three functions:
+Usage of MLlib's ANN is demonstrated through the 'TestParallelANN' demo program. The program generates three functions:
 
 * f2d: x -> y
 * f3d: (x,y) -> z

From 3807e7345bcb3a0dd5bc7ec55a35b91929b57cf4 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Fri, 22 Aug 2014 15:35:54 +0800
Subject: [PATCH 023/143] Update mllib-ann.md

---
 docs/mllib-ann.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/mllib-ann.md b/docs/mllib-ann.md
index a653724c06997..975a9523be82e 100644
--- a/docs/mllib-ann.md
+++ b/docs/mllib-ann.md
@@ -147,7 +147,7 @@ All information needed for the ANN is stored in the 'ParallelANNModel' class. Th
 
 The information in 'parallelANNModel' consist of the weights, the number of input, hidden and output nodes, as well as two functions 'predictPoint' and 'predictPointV'.
 
-The 'predictPoint' function is used to calculate a single output value as a 'Double'. If the output of the ANN actually is a vector, it returns just the first element of the vector, that is O_{0}. The output of the 'predictPointV' is of type 'Vector', and returns all K output values.
+The 'predictPoint' function is used to calculate a single output value as a 'Double'. If the output of the ANN actually is a vector, it returns just the first element of the vector, that is O_0. The output of the 'predictPointV' is of type 'Vector', and returns all K output values.
 
 ## The 'GeneralizedSteepestDescentAlgorithm' class
 

From 5236a9df1aa0fa662ecaf0ca779874e1895908af Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Fri, 22 Aug 2014 15:44:00 +0800
Subject: [PATCH 024/143] Update mllib-ann.md

---
 docs/mllib-ann.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/mllib-ann.md b/docs/mllib-ann.md
index 975a9523be82e..005a5be4987d9 100644
--- a/docs/mllib-ann.md
+++ b/docs/mllib-ann.md
@@ -82,7 +82,7 @@ Where g is the sigmod function
 
 `$g(t) = \frac{e^{\beta t} }{1+e^{\beta t}}$`
 
-and `$\beta` the learning rate.
+and `$\beta$` defines the steepness of g.
 
 # Gradient descent
 
@@ -124,14 +124,14 @@ The 'ParallelANN' class has the following constructors:
 * `ParallelANN( noInput, noHidden )`: as 'ParallelANN()', but allows specification of number of 'noInput' and 'noHidden'
 * `ParallelANN( noInput, noHidden, noOutput )`: as 'ParallelANN()', but allows specification of 'noInput', 'noHidden' and 'noOutput'
 
-The number of input nodes I is stored in the variable 'noInput', the number of hidden nodes J is stored in 'noHidden' and the number of output nodes K is stored in 'noOutput'. 'beta' contains the value of '$\beta$' for the sigmoid function.
+The number of input nodes I is stored in the variable 'noInput', the number of hidden nodes J is stored in 'noHidden' and the number of output nodes K is stored in 'noOutput'. 'beta' contains the value of `$\beta$` for the sigmoid function.
 
 The parameters 'stepSize', 'numIterations' and 'miniBatchFraction' are of use for the Statistical Gradient Descent function.
 
 In addition, it has a single vector 'weights' corresponding to V_{ij} and W_{jk}. The mapping of V_{ij} and W_{jk} into 'weights' is as follows:
 
-* V_{ij} -> `weights[  i + j*(noInput+1) ]$`
-* W_{jk} -> `weights[ (noInput+1)*noHidden + j + k*(noHidden+1) ]$`
+* V_{ij} -> `weights[  i + j*(noInput+1) ]`
+* W_{jk} -> `weights[ (noInput+1)*noHidden + j + k*(noHidden+1) ]`
 
 The training function carries the name 'train'. It can take various inputs:
 

From 443ea7e37ce77af27037f5f46488a50cc70dd6f1 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Tue, 2 Sep 2014 09:40:30 +0800
Subject: [PATCH 025/143] Update and rename
 GeneralizedSteepestDescentAlgorithm.scala to GeneralizedModel.scala

Make the model more general, as it can be used for algorithms other than steepest descent too.
---
 ...Algorithm.scala => GeneralizedModel.scala} | 25 ++++++-------------
 1 file changed, 7 insertions(+), 18 deletions(-)
 rename mllib/src/main/scala/org/apache/spark/mllib/ann/{GeneralizedSteepestDescentAlgorithm.scala => GeneralizedModel.scala} (83%)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescentAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedModel.scala
similarity index 83%
rename from mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescentAlgorithm.scala
rename to mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedModel.scala
index 3bc499fa10270..ba3e31ae9d6da 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescentAlgorithm.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedModel.scala
@@ -29,13 +29,13 @@ import breeze.linalg.{SparseVector => BSV}
 
 /**
  * :: DeveloperApi ::
- * GeneralizedSteepestDescendModel represents a model trained using
- * GeneralizedSteepestDescendAlgorithm.
+ * GeneralizedModel represents a model trained using
+ * GeneralizedAlgorithm.
  *
  * @param weights Weights computed for every feature.
  */
 @DeveloperApi
-abstract class GeneralizedSteepestDescentModel(val weights: Vector )
+abstract class GeneralizedModel(val weights: Vector )
 
   extends Serializable {
 
@@ -123,12 +123,11 @@ abstract class GeneralizedSteepestDescentModel(val weights: Vector )
 
 /**
  * :: DeveloperApi ::
- * GeneralizedSteepestDescend implements methods to train a function using
- * the Steepest Descend algorithm.
- * This class should be extended with an Optimizer to create a new GLM.
+ * GeneralizedAlgorithm implements methods to train a function.
+ * This class should be extended with an Optimizer to create a new GM.
  */
 @DeveloperApi
-abstract class GeneralizedSteepestDescentAlgorithm[M <: GeneralizedSteepestDescentModel]
+abstract class GeneralizedAlgorithm[M <: GeneralizedModel]
   extends Logging with Serializable {
 
   /** The optimizer to solve the problem. */
@@ -139,19 +138,9 @@ abstract class GeneralizedSteepestDescentAlgorithm[M <: GeneralizedSteepestDesce
    */
   protected def createModel(weights: Vector): M
 
-  /** Prepends one to the input vector. */
-  private def prependOne(vector: Vector): Vector = {
-    val vector1 = vector.toBreeze match {
-      case dv: BDV[Double] => BDV.vertcat(BDV.ones[Double](1), dv)
-      case sv: BSV[Double] => BSV.vertcat(new BSV[Double](Array(0), Array(1.0), 1), sv)
-      case v: Any => throw new IllegalArgumentException("Do not support vector type " + v.getClass)
-    }
-    Vectors.fromBreeze(vector1)
-  }
-
   /**
    * Run the algorithm with the configured parameters on an input RDD
-   * of LabeledPoint entries starting from the initial weights provided.
+   * of (Vector,Vector) entries starting from the initial weights provided.
    */
   def run(input: RDD[(Vector,Vector)], initialWeights: Vector): M = {
 

From 3466f9523b63b6427c65dbf0d0210b346ba998d9 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Tue, 2 Sep 2014 09:48:20 +0800
Subject: [PATCH 026/143] Update ParallelANN.scala

Updated the code to implement true back-propagation

Thanks to Alexander Ulanov (avulanov) for implementing true back-propagation in his repository first. This code borrows extensively from his code, and uses the same back-propagation algorithm (save for using arrays rather than matrices/vectors) and "layers" vector (here called "ontology").
---
 .../apache/spark/mllib/ann/ParallelANN.scala  | 532 ++++++++++--------
 1 file changed, 301 insertions(+), 231 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala
index d1a31f2c598a3..789854da4d32a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala
@@ -34,159 +34,168 @@ import org.apache.spark.util.random.XORShiftRandom
 /*
  * Implements a Artificial Neural Network (ANN)
  *
- * format of data:
- * data[ 0..noInput-1 ]: Input
- * data[ noInput..noInput+noOutput-1 ]: Output
+ * The data consists of an input vector and an output vector, combined into a single vector
+ * as follows:
+ *
+ * [ ---input--- ---output--- ]
+ *
+ * NOTE: output values should be in the range [0,1]
+ *
+ * For a network of L layers:
+ *
+ * topology( l ) indicates the number of nodes in layer l, excluding the bias node.
+ *
+ * noInput = topology(0), the number of input nodes
+ * noOutput = topology(L-1), the number of output nodes
+ *
+ * input = data( 0 to noInput-1 )
+ * output = data( noInput to noInput+noOutput-1 )
+ *
+ * W_ijl is the weight from node i in layer l-1 to node j in layer l
+ * W_ijl goes to position ofsWeight(l) + j*(topology(l-1)+1) + i in the weights vector
+ *
+ * B_jl is the bias input of node j in layer l
+ * B_jl goes to position ofsWeight(l) + j*(topology(l-1)+1) + topology(l-1) in the weights vector
+ *
+ * error function: E( O, Y ) = sum( O_j - Y_j )
+ * (with O = (O_0, ..., O_(noOutput-1)) the output of the ANN,
+ * and (Y_0, ..., Y_(noOutput-1)) the input)
+ *
+ * node_jl is node j in layer l
+ * node_jl goes to position ofsNode(l) + j
+ *
+ * The weights gradient is defined as dE/dW_ijl and dE/dB_jl
+ * It has same mapping as W_ijl and B_jl
+ *
+ * For back propagation:
+ * delta_jl = dE/dS_jl, where S_jl the output of node_jl, but before applying the sigmoid
+ * delta_jl has the same mapping as node_jl
+ *
+ * Where E = ((estOutput-output),(estOutput-output)),
+ * the inner product of the difference between estimation and target output with itself.
  *
  */
 
-trait ANN {
-
-  def noInput: Integer
-  def noHidden: Integer
-  def noOutput: Integer
-  def beta: Double
-
-  def g( x: Double ) = (1/(1 + math.exp(-beta*x)))
-  def dg( x: Double ) = beta*g(x)*(1 - g(x))
+class ParallelANNModel private[mllib] (
+    override val weights: Vector,
+    val topology: Array[Int] )
+  extends GeneralizedModel(weights) with RegressionModel with Serializable {
 
-  /* returns the hidden layer including the -1 robonode! */
-  def computeHidden( data: Array[Double], weights: Array[Double] ): Array[Double] = {
+  val L = topology.length-1
 
-    var arrHidden = new Array[Double]( noHidden + 1 )
+  val ofsWeight: Array[Int] = {
 
-    for( j <- 0 to noHidden-1 ) {
+    var tmp = new Array[Int]( L + 1 )
 
-      val start = j*(noInput + 1)
-      var v: Double = 0;
-      for( w <- 0 to noInput-1 )
-        v = v + data(w)*weights( start + w )
-      v = v - 1.0 * weights( start + noInput ) // robonode
-      arrHidden( j ) = g( v )
+    var curPos = 0;
 
+    tmp( 0 ) = 0;
+    for( l <- 1 to L ) {
+      tmp( l ) = curPos
+      curPos = curPos + ( topology( l - 1 ) + 1 ) * ( topology( l ) )
     }
 
-    arrHidden( noHidden ) = -1.0
-
-    arrHidden
+    tmp
 
   }
 
-  /* returns the hidden layer including the -1 robonode, as well as the final estimation */
-  def computeValues( 
-      data: Array[Double], 
-      weights: Array[Double] ): 
-      (Array[Double], Array[Double]) = {
+  def g( x: Double ) = 1.0 / (1.0 + math.exp( -x ) )
+
+  def computeValues( arrData: Array[Double], arrWeights: Array[Double] ): Array[Double] = {
 
-    var hidden = computeHidden( data, weights )
-    var output = new Array[Double](noOutput)
+    var arrPrev = new Array[Double]( topology( 0 ) )
 
-    for( k <- 0 to noOutput - 1 ) {
-      var tmp: Double = 0.0;
-      for( i <- 0 to noHidden )
-        tmp = tmp + hidden(i)*weights( noHidden * ( noInput + 1 ) + k * ( noHidden + 1 ) + i )
-      output(k) = g( tmp )
+    for( i <- 0 until topology( 0 ) )
+      arrPrev( i ) = arrData( i )
 
+    for( l <- 1 to L ) {
+      val arrCur = new Array[Double]( topology( l ) )
+      for( j <- 0 until topology( l ) ) {
+        var cum: Double = 0.0
+        for( i <-0 until topology( l-1 ) )
+          cum = cum +
+            arrPrev( i ) * arrWeights( ofsWeight( l ) + ( topology( l-1 ) + 1 ) * j + i )
+        cum = cum +
+          arrWeights( ofsWeight( l ) + ( topology( l-1 ) + 1 )*j + topology( l-1 ) ) // bias
+        arrCur( j ) = g( cum )
+      }
+      arrPrev = arrCur;
     }
 
-    ( hidden, output )
+    arrPrev
 
   }
 
-}
-
-class ParallelANNModel private[mllib] (
-    override val weights: Vector,
-    val noInp: Integer,
-    val noHid: Integer,
-    val noOut: Integer,
-    val b: Double )
-  extends GeneralizedSteepestDescentModel(weights) with RegressionModel with Serializable with ANN {
-
-  val noInput = noInp
-  val noHidden = noHid
-  val noOutput = noOut
-  val beta = b
-
   override def predictPoint( data: Vector, weights: Vector ): Double = {
-    val outp = computeValues( data.toArray, weights.toArray )._2
+    val outp = computeValues( data.toArray, weights.toArray )
     outp(0)
   }
 
   def predictPointV( data: Vector, weights: Vector): Vector = {
-    Vectors.dense( computeValues( data.toArray, weights.toArray )._2 )
+    Vectors.dense( computeValues( data.toArray, weights.toArray ) )
   }
 
 }
 
-class ParallelANN private (
-    private var stepSize: Double,
+class ParallelANN(
+    private var topology: Array[Int],
     private var numIterations: Int,
-    private var miniBatchFraction: Double,
-    private var noInput: Int,
-    private var noHidden: Int,
-    private var noOutput: Int,
-    private val beta: Double )
-  extends GeneralizedSteepestDescentAlgorithm[ParallelANNModel] with Serializable {
+    private var stepSize: Double,
+    private var miniBatchFraction: Double )
+  extends GeneralizedAlgorithm[ParallelANNModel] with Serializable {
 
   private val rand = new XORShiftRandom
 
-  private val gradient = new LeastSquaresGradientANN( noInput, noHidden, noOutput, beta )
+  private val gradient = new LeastSquaresGradientANN( topology )
   private val updater = new ANNUpdater()
   override val optimizer = new GradientDescent(gradient, updater)
     .setStepSize(stepSize)
     .setNumIterations(numIterations)
     .setMiniBatchFraction(miniBatchFraction)
 
-  def this() = {
-    this( 1.0, 100, 1.0, 1, 5, 1, 1.0 )
-  }
+  val noWeights = {
 
-  def this( noHidden: Int ) = {
-    this( 1.0, 100, 1.0, 1, noHidden, 1, 1.0 )
-  }
+    var tmp = 0
 
-  def this( noInput: Int, noHidden: Int ) = {
-    this( 1.0, 100, 1.0, noInput, noHidden, 1, 1.0 )
-  }
+    for( i<-1 until topology.size ) {
+      tmp = tmp + topology(i) * (topology(i-1) + 1)
+    }
+
+    tmp
 
-  def this( noInput: Int, noHidden: Int, noOutput: Int ) = {
-    this( 1.0, 100, 1.0, noInput, noHidden, noOutput, 1.0 )
   }
 
-  override protected def createModel(weights: Vector) = {
-    new ParallelANNModel( weights, noInput, noHidden, noOutput, beta )
+  def this( topology: Array[Int] ) = {
+    this( topology, 100, 1.0, 1.0 )
   }
 
-  def checkOutput( rdd: RDD[(Vector,Vector)] ) {
-    val oVals = rdd.flatMap( T => T._2.toArray )
-    var omax = oVals.max
-    assert( omax <= 1 )
-    var omin = oVals.min
-    assert( omin >= 0 )
+  def this( noInput: Int, noHidden: Int, noOutput: Int ) = {
+    this( Array( noInput, noHidden, noOutput ) )
   }
 
-  def randomDouble( i: Int ): Double = {
-    rand.nextDouble()
+  override protected def createModel( weights: Vector ) = {
+    new ParallelANNModel( weights, topology )
   }
 
   def train( rdd: RDD[(Vector,Vector)] ): ParallelANNModel = {
 
     val ft = rdd.first()
 
-    assert( noInput == ft._1.size )
-    assert( noOutput == ft._2.size )
+    assert( topology( 0 ) == ft._1.size )
+    assert( topology( topology.length-1 ) == ft._2.size )
 
-    checkOutput( rdd )
+    val initialWeightsArr = new Array[Double](noWeights)
 
-    val noWeights = (noInput + 1)*noHidden + (noHidden + 1)*noOutput
+    var pos = 0;
 
-    val initialWeightsArr = new Array[Double](noWeights)
+    for( l <- 1 until topology.length ) {
+      for( i <- 0 until ( topology( l ) * ( topology( l - 1 ) + 1 ) ) ) {
+        initialWeightsArr( pos ) = ( rand.nextDouble * 4.8 - 2.4 ) / ( topology( l - 1 ) + 1)
+      pos = pos + 1;
+      }
+    }
 
-    for( i <- 0 to (noInput + 1)*noHidden - 1 )
-      initialWeightsArr( i ) = (randomDouble(i)*4.8 - 2.4)/(noInput + 1)
-    for( i <- 0 to (noHidden + 1)*noOutput - 1 )
-      initialWeightsArr( (noInput + 1)*noHidden + i ) = (randomDouble(i)*4.8 - 2.4)/(noHidden + 1)
+    assert( pos == noWeights )
 
     val initialWeights = Vectors.dense( initialWeightsArr )
 
@@ -195,190 +204,255 @@ class ParallelANN private (
   }
 
   def train( rdd: RDD[(Vector,Vector)], model: ParallelANNModel ): ParallelANNModel = {
+
     run( rdd, model.weights )
+
   }
 
   def train( rdd: RDD[(Vector,Vector)], weights: Vector ): ParallelANNModel = {
 
     val ft = rdd.first()
-    assert( noInput == ft._1.size )
-    assert( noOutput == ft._2.size )
-    assert( weights.size == (noInput + 1) * noHidden + (noHidden + 1) * noOutput )
+    assert( weights.size == noWeights )
     run( rdd, weights );
 
   }
 
 }
 
-/**
- * data consists of input vector and output vector, and has the following form:
- *
- * [ ---input--- ---output--- ]
- *
- * where input = data( 0 to noInput-1 ) and output = data( noInput to noInput+noOutput-1 )
- *
- * V_ij is the weight from input node i to hidden node j
- * W_jk is the weight from hidden node j to output node k
- *
- * The weights have the following mapping:
- *
- * V_ij goes to position i + j*(noInput+1)
- * W_jk goes to position (noInput+1)*noHidden + j + k*(noHidden+1)
- *
- * Gradient has same mapping, i.e.
- * dE/dVij goes to i + j*(noInput+1)
- * dE/dWjk goes to (noInput+1)*noHidden + j +k*(noHidden+1)
- *
- * Where E = ((estOutput-output),(estOutput-output)),
- * the inner product of the difference between estimation and target output with itself.
- */
-
 class LeastSquaresGradientANN(
-    noInp: Integer,
-    noHid: Integer,
-    noOut: Integer,
-    b: Double )
-  extends Gradient with ANN {
+    topology: Array[Int] )
+  extends Gradient {
 
-  val noInput = noInp
-  val noHidden = noHid
-  val noOutput = noOut
-  val beta = b
+  def g( x: Double ) = 1.0 / (1.0 + math.exp( -x ) )
 
-  /* For verification only
-  private val rand = new XORShiftRandom
-  */
+  val L = topology.length-1
 
-  override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
+  val noWeights = {
 
-    val arrData = data.toArray
-    val arrWeights = weights.toArray
+    var tmp = 0
+
+    for( i<-1 to L ) {
+      tmp = tmp + topology(i) * ( topology( i - 1 ) + 1 )
+    }
+
+    tmp
+
+  }
 
-    var gradient = new Array[Double]( (noInput + 1) * noHidden + (noHidden + 1) * noOutput )
+  val ofsWeight: Array[Int] = {
 
-    val (arrHidden, output) = computeValues( arrData, arrWeights )
-    val arrEst = output
+    var tmp = new Array[Int]( L + 1 )
+    var curPos = 0;
 
-    var diff = new Array[Double]( noOutput )
-    var E: Double = 0.0
-    for( i <-0 to noOutput-1 ) {
-      diff( i ) = arrEst( i ) - arrData( noInput.toInt + i );
-      E = E + diff(i) * diff(i)
+    tmp( 0 ) = 0;
+    for( l <- 1 to L ) {
+      tmp( l ) = curPos
+      curPos = curPos + ( topology( l - 1 ) + 1 ) * ( topology( l ) )
     }
 
-    /*
-     * The following fields are for verification only
-    val eps = .000001
-    val testOneVOutOf = 5000;
-    val testOneWOutOf = 2500;
-    var arrWeights_tmp = weights.toArray
-    val warnErr = 5e-7
-    */
+    tmp
+
+  }
 
-    /* Wjk */
-    for( k <- 0 to noOutput - 1 ) {
+  val noNodes: Int = {
 
-      var start = noHidden*(noInput + 1) + k*(noHidden + 1)
-      var sum_l: Double = 0
-      for( w <- 0 to noHidden )
-         sum_l = sum_l +  arrHidden( w ) * arrWeights( w + start )
-      val dg_sum_l = dg( sum_l )
+    var tmp: Integer = 0
 
+    for( l <-0 until topology.size ) {
+      tmp = tmp + topology( l )
+    }
 
-      for( j <- 0 to noHidden ) {
+    tmp
 
-        gradient( noHidden*(noInput + 1) + k*(noHidden + 1) + j )
-          = 2*(diff(k))*dg_sum_l*arrHidden(j)
+  }
 
-        /*
-         * The following is for verification only
-        if( rand.nextInt % (testOneWOutOf>>1) == 0 ) {
-          arrWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j )
-            = arrWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j ) + eps
-          val est2 = computeValues( arrData, arrWeights_tmp )._2
-          var E2: Double = 0.0;
-          for( w <- 0 to noOutput-1 ) {
-            val diff2 = est2(w)-data( noInput+w )
-            E2 = E2 + diff2*diff2
-          }
-          val d = ( E2 - E ) / eps
-          val compErr = math.abs( gradient( noHidden*(noInput+1)+k*(noHidden+1)+j) - d )
-          if( compErr > warnErr ) {
-            println( "!!! Calc/Est Wjk: " + 
-              ( ( gradient( noHidden*(noInput+1)+k*(noHidden+1)+j), d ), compErr ) )
-          }
-          arrWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j )
-            = arrWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j ) - eps
-        }
-        */
+  val ofsNode: Array[Int] = {
 
+    var tmp = new Array[Int]( L + 1 )
+    tmp( 0 ) = 0
+
+    for( l <-1 to L ) {
+      tmp( l ) = tmp( l - 1 ) + topology( l - 1 )
+    }
+
+    tmp
+
+  }
+
+  /* For verification only
+  def calcErr( arrData: Array[Double], arrWeights: Array[Double] ): Double = {
+
+    var arrPrev = new Array[Double]( topology( 0 ) )
+
+    for( i <- 0 until topology( 0 ) )
+      arrPrev( i ) = arrData( i )
+
+    for( l <- 1 to L ) {
+      val arrCur = new Array[Double]( topology( l ) )
+      for( j <- 0 until topology( l ) ) {
+        var cum: Double = 0.0
+        for( i <-0 until topology( l-1 ) ) {
+          cum = cum +
+            arrPrev( i ) * arrWeights( ofsWeight( l ) + ( topology( l-1 ) + 1 ) * j + i )
+        }
+        cum = cum +
+          arrWeights( ofsWeight( l ) + ( topology( l-1 ) + 1 )*j + topology( l-1 ) ) // bias
+        arrCur( j ) = g( cum )
       }
+      arrPrev = arrCur;
+    }
+
+    val arrDiff = new Array[Double]( topology( L ) )
+    for( j <- 0 until topology( L ) ) {
+      arrDiff( j ) = ( arrPrev( j ) - arrData( topology(0) + j ) )
+    }
+
+    var err: Double = 0;
+    for( j <-0 until topology( L ) ) {
+      err = err + arrDiff( j )*arrDiff( j )
+    }
+
+    err*.5
+  }
+  */
+
+  override def compute( data: Vector, label: Double, weights: Vector ): ( Vector, Double ) = {
+
+    val arrData = data.toArray
+    val arrWeights = weights.toArray
+    val arrNodes = new Array[Double]( noNodes )
+
+    /*
+     * nodes
+     */
 
+    for( i <- 0 until topology( 0 ) ) {
+      arrNodes( i ) = arrData( i )
     }
 
-    var start = noHidden * (noInput + 1)
-    var sum_n1: Double = 0
-    for( w <- 0 to noHidden )
-       sum_n1 = sum_n1 + arrHidden( w )*arrWeights( w + start )
-    val dg_sum_n1 = dg( sum_n1 )
+    for( l <- 1 to L ) {
+      for( j <- 0 until topology( l ) ) {
+        var cum: Double = 0.0;
+        for( i <- 0 until topology( l-1 ) ) {
+          cum = cum +
+            arrWeights( ofsWeight( l ) + ( topology( l-1 ) + 1 ) * j + i ) *
+            arrNodes( ofsNode( l-1 ) + i )
+        }
+        cum = cum + arrWeights( ofsWeight( l ) + ( topology( l-1 ) +  1 )*j + topology( l-1 ) )
+        arrNodes( ofsNode( l ) + j ) = g( cum )
+      }
+    }
 
+    val arrDiff = new Array[Double]( topology( L ) )
+    for( j <- 0 until topology( L ) ) {
+      arrDiff( j ) = ( arrNodes( ofsNode( L ) + j ) - arrData( topology(0) + j ) )
+    }
 
-    /* Vij */
-    for( j <- 0 to noHidden - 1 ) { /* the hidden robonode has no associated Vij */
+    var err: Double = 0;
+    for( j <-0 until topology( L ) ) {
+      err = err + arrDiff( j )*arrDiff( j )
+    }
+    err = err*.5
 
-      start = j * ( noInput + 1 )
-      var sum_n2: Double = 0
-      for( w <- 0 to noInput-1 ) // non-robonodes
-         sum_n2 = sum_n2 + arrData( w )*arrWeights( w + start)
-      sum_n2 = sum_n2 - arrWeights( noInput + start) // robonode
-      val dg_sum_n2 = dg( sum_n2 )
+    /*
+     * back propagation
+     */
 
-      for( i <- 0 to noInput ) {
+    val arrDelta = new Array[Double]( noNodes )
 
+    for( j <- 0 until topology( L ) ) {
+      arrDelta( ofsNode( L ) + j ) =
+        arrDiff( j ) *
+        arrNodes( ofsNode( L ) + j ) * ( 1 - arrNodes( ofsNode( L ) + j ) )
+    }
 
-        for( k<- 0 to noOutput - 1 ) {
+    for( l <- L-1 until 0 by -1 ) {
+      for( j <- 0 until topology( l ) ) {
+        var cum: Double = 0.0
+        for( i <- 0 until topology( l + 1 ) ) {
+          cum = cum +
+            arrWeights( ofsWeight( l + 1 ) + ( topology( l ) + 1 ) * i + j ) *
+            arrDelta( ofsNode( l + 1 ) + i )  *
+            arrNodes( ofsNode( l ) + j ) * ( 1 - arrNodes( ofsNode( l ) + j ) )
+        }
+        arrDelta( ofsNode( l ) + j ) = cum
+      }
+    }
 
-          if( i<noInput ) { // non-robonode
-            gradient( i + j * (noInput + 1) ) =
-            gradient( i + j * (noInput + 1) ) +
-              2 * ( diff(k)  )* dg_sum_n1 *
-              arrWeights( noHidden * (noInput + 1) + k * (noHidden + 1) + j ) *
-              dg_sum_n2*arrData( i )
+    /*
+     * gradient
+     */
+
+    /* for verification only
+    val arrWcopy = new Array[Double]( noWeights )
+    Array.copy(arrWeights, 0, arrWcopy, 0, noWeights )
+    val eps = 0.000001
+    val errGradAccept = 5e-6
+    */
 
+    val arrGrad = new Array[Double]( noWeights )
+
+    for( l <- 1 to L ) {
+      for( j <-0 until topology( l ) ) {
+        for( i <- 0 until topology( l-1 ) ) {
+          arrGrad( ofsWeight( l ) + ( topology( l - 1 ) + 1 ) * j + i ) =
+            arrNodes( ofsNode( l - 1 ) + i ) *
+            arrDelta( ofsNode( l ) + j )
+
+          /* for verification only
+          val tmpErr0 = calcErr( arrData, arrWcopy )
+          arrWcopy( ofsWeight( l ) + ( topology( l -1 ) + 1 ) * j + i ) =
+            arrWcopy( ofsWeight( l ) + ( topology( l -1 ) + 1 ) * j + i ) + eps
+          val tmpErr1 = calcErr( arrData, arrWcopy )
+          arrWcopy( ofsWeight( l ) + ( topology( l -1 ) + 1 ) * j + i ) =
+            arrWcopy( ofsWeight( l ) + ( topology( l -1 ) + 1 ) * j + i ) - eps
+          val dE = ( tmpErr1 - tmpErr0 ) / eps
+
+          val errGrad =
+            math.abs( dE - arrGrad( ofsWeight( l ) +
+            ( topology( l - 1 ) + 1 ) * j + i ) )
+
+          try {
+            assert( errGrad < errGradAccept )
           }
-          else { // robonode
-            gradient( i + j * (noInput + 1) ) =
-            gradient( i + j * (noInput + 1) ) -
-              2 * ( diff(k) ) * dg_sum_n1 *
-              arrWeights( noHidden * (noInput + 1) + k * (noHidden + 1) + j ) *
-              dg_sum_n2
+          catch {
+            case e: AssertionError =>
+              println( (dE, arrGrad( ofsWeight( l ) +
+                ( topology( l - 1 ) + 1 ) * j + i ), errGrad ) )
           }
-
+          */
         }
 
-        /*
-         * The following is for verification only
-        if( rand.nextInt % (testOneVOutOf>>1) == 0 ) {
-          arrWeights_tmp( i+j*(noInput+1) ) = arrWeights_tmp( i+j*(noInput+1) ) + eps
-          val est2 = computeValues( arrData, arrWeights_tmp )._2
+        arrGrad( ofsWeight( l ) + ( topology( l - 1 ) + 1 ) * j + topology( l-1 )  ) =
+          arrDelta( ofsNode( l ) + j )
 
-          var E2: Double = 0.0;
-          for( w <- 0 to noOutput-1 ) {
-            val diff2 = est2(w)-data( noInput+w )
-            E2 = E2 + diff2*diff2
-          }
+        /* for verification only
+        val tmpErr0 = calcErr( arrData, arrWcopy )
+        arrWcopy( ofsWeight( l ) + ( topology( l - 1 ) + 1 ) * j + topology( l-1 ) ) =
+          arrWcopy( ofsWeight( l ) + ( topology( l - 1 ) + 1 ) * j + topology( l-1 ) ) + eps
+        val tmpErr1 = calcErr( arrData, arrWcopy )
+        arrWcopy( ofsWeight( l ) + ( topology( l - 1 ) + 1 ) * j + topology( l-1 ) ) =
+          arrWcopy( ofsWeight( l ) + ( topology( l - 1 ) + 1 ) * j + topology( l-1 ) ) - eps
+        val dE = ( tmpErr1 - tmpErr0 ) / eps
 
-          val d = ( E2 - E ) / eps
-          val compErr = math.abs( gradient( i+j*(noInput+1) )-d )
-          if( compErr>warnErr )
-            println( "!!! Calc/Est Vij: "+ ( ( gradient( i+j*(noInput+1) ), d ), compErr ) )
-          arrWeights_tmp( i+j*(noInput+1) ) = arrWeights_tmp( i+j*(noInput+1) ) - eps
+        val errGrad = math.abs( dE -
+          arrGrad( ofsWeight( l ) + ( topology( l - 1 ) + 1 ) * j + topology( l-1 ) ) )
+
+        try {
+          assert( errGrad < errGradAccept )
+        }
+        catch {
+          case e: AssertionError =>
+            println( (dE,
+              arrGrad( ofsWeight( l ) + ( topology( l - 1 ) + 1 ) * j + topology( l-1 ) ),
+              errGrad ) )
         }
         */
+
       }
     }
 
-    (Vectors.dense(gradient), E)
+    ( Vectors.dense( arrGrad ), err )
 
   }
 
@@ -389,9 +463,7 @@ class LeastSquaresGradientANN(
       cumGradient: Vector): Double = {
 
     val (grad, err) = compute( data, label, weights )
-
     cumGradient.toBreeze += grad.toBreeze
-
     return err
 
   }
@@ -407,12 +479,10 @@ class ANNUpdater extends Updater {
       regParam: Double): (Vector, Double) = {
 
     val thisIterStepSize = stepSize
-
     val brzWeights: BV[Double] = weightsOld.toBreeze.toDenseVector
-
     brzAxpy(-thisIterStepSize, gradient.toBreeze, brzWeights)
-
     (Vectors.fromBreeze(brzWeights), 0)
+
   }
 
 }

From aed39c66255b4a368d587f2b0aba9c966fadfdd0 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Tue, 2 Sep 2014 09:50:00 +0800
Subject: [PATCH 027/143] Update TestParallelANN.scala

Updated to new interface using the "topology" vector.
---
 .../spark/mllib/ann/TestParallelANN.scala     | 32 +++++++++----------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
index b37c8a493dd2b..a097df0c5f520 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
@@ -127,9 +127,9 @@ object TestParallelANN {
   def main( arg: Array[String] ) {
 
     println( "Parallel ANN tester" )
+    println
 
-    val formatter = new SimpleDateFormat("hh:mm:ss")
-    val starttime = Calendar.getInstance().getTime()
+    val formatter = new SimpleDateFormat("hh:mm:ss")    
 
     var curAngle: Double = 0.0
     var graphic: Boolean = false
@@ -158,12 +158,16 @@ object TestParallelANN {
     var A = 20.0
     var B = 50.0
 
-    var conf = new SparkConf().setAppName("Parallel ANN").setMaster("local[5]")
+    var conf = new SparkConf().setAppName("Parallel ANN").setMaster("local[1]")
     var sc = new SparkContext(conf)
 
-    val testRDD2D = sc.parallelize( generateInput2D( T => f(T), -10, 10, 100 ), 2).cache()
+    val testRDD2D = sc.parallelize( generateInput2D( T => f(T), -10, 10, 100 ), 2).cache
     val testRDD3D = sc.parallelize( generateInput3D( (x,y) => f3D(x,y), -10, 10, -10, 10, 200 ), 2).cache
     val testRDD4D = sc.parallelize( generateInput4D( t => f4D(t), -10, 10, 100 ), 2 ).cache
+    
+    val validationRDD2D = sc.parallelize( generateInput2D( T => f(T), -10, 10, 100 ), 2).cache
+    val validationRDD3D = sc.parallelize( generateInput3D( (x,y) => f3D(x,y), -10, 10, -10, 10, 100 ), 2).cache
+    val validationRDD4D = sc.parallelize( generateInput4D( t => f4D(t), -10, 10, 100 ), 2 ).cache
 
     if( graphic ) {
 
@@ -173,26 +177,25 @@ object TestParallelANN {
 
     }
 
-    val parallelANN2D = new ParallelANN( 1, 10 )
+    val parallelANN2D = new ParallelANN( Array[Int]( 1, 3, 3, 1 ) )
     parallelANN2D.optimizer.setNumIterations(1000).setStepSize( 1.0 )
 
-    val parallelANN3D = new ParallelANN( 2, 20 )
+    val parallelANN3D = new ParallelANN( Array[Int]( 2, 20, 1 ) )
     parallelANN3D.optimizer.setNumIterations(1000).setStepSize( 1.0 )
 
-    val parallelANN4D = new ParallelANN( 1, 20, 3 )
+    val parallelANN4D = new ParallelANN( Array[Int]( 1, 20, 3 ) )
     parallelANN4D.optimizer.setNumIterations( 1000 ).setStepSize( 1.0 )
+    
+    val starttime = Calendar.getInstance().getTime()
+    println( "Start training " + starttime )
 
     var model2D = parallelANN2D.train( testRDD2D )
     var model3D = parallelANN3D.train( testRDD3D )
     var model4D = parallelANN4D.train( testRDD4D )
 
-    val noIt = 20
+    val noIt = 1500
     var errHist = new Array[(Int,Double,Double,Double)]( noIt )
 
-    val validationRDD2D = sc.parallelize( generateInput2D( T => f(T), -10, 10, 100 ), 2).cache
-    val validationRDD3D = sc.parallelize( generateInput3D( (x,y) => f3D(x,y), -10, 10, -10, 10, 100 ), 2).cache
-    val validationRDD4D = sc.parallelize( generateInput4D( t => f4D(t), -10, 10, 100 ), 2 ).cache
-
     for( i <- 0 to noIt - 1 ) {
 
       val predictedAndTarget2D = validationRDD2D.map( T => ( T._1, T._2, model2D.predictV( T._1 ) ) )
@@ -247,10 +250,7 @@ object TestParallelANN {
 
       }
 
-      val now = Calendar.getInstance().getTime()
-      val times = formatter.format( now );
-
-      println( "It. "+i+" ("+times+"), Error 2D/3D/4D: " + (err2D, err3D, err4D) )
+      println( "It. "+i+" ("+Calendar.getInstance().getTime()+"), Error 2D/3D/4D: " + (err2D, err3D, err4D) )
       errHist(i) = ( i, err2D, err3D, err4D )
 
       if( i < noIt - 1 ) {

From 1972c695b3963f42615c02422717b0da26f4af1d Mon Sep 17 00:00:00 2001
From: Alexander Ulanov <nashb@yandex.ru>
Date: Fri, 5 Sep 2014 18:11:21 +0400
Subject: [PATCH 028/143] ANN test suite: learning XOR function

---
 .../org/apache/spark/mllib/ann/ANNSuite.scala | 29 +++++++++++++++++++
 1 file changed, 29 insertions(+)
 create mode 100644 mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
new file mode 100644
index 0000000000000..ff6eb51377d78
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
@@ -0,0 +1,29 @@
+package org.apache.spark.mllib.ann
+
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.util.LocalSparkContext
+import org.scalatest.FunSuite
+
+class ANNSuite extends FunSuite with LocalSparkContext {
+  private val inputs = Array[Array[Double]](
+    Array[Double](0,0),
+    Array[Double](0,1),
+    Array[Double](1,0),
+    Array[Double](1,1)
+  )
+  private val outputs = Array[Double](0, 1, 1, 0)
+  private val inputSize = 2
+  private val hiddenSize = 5
+  private val outputSize = 1
+  test("ANN learns XOR function") {
+    val data = inputs.zip(outputs).map { case(features, label) =>
+      (Vectors.dense(features), Vectors.dense(Array(label)))}
+    val rddData = sc.parallelize(data, 2)
+    val ann = new ParallelANN(Array[Int](inputSize, hiddenSize, outputSize))
+    ann.optimizer.setNumIterations(2000).setStepSize(2.0)
+    val model = ann.train(rddData)
+    val predictionAndLabels = rddData.map { case(input, label) =>
+      (model.predictV(input)(0), label(0)) }.collect()
+    assert(predictionAndLabels.forall { case(p, l) => (math.round(p) - l) == 0 })
+  }
+}

From d04c1d63c596136fae3c0885ede20fd86413c63c Mon Sep 17 00:00:00 2001
From: Alexander Ulanov <nashb@yandex.ru>
Date: Tue, 9 Sep 2014 12:34:53 +0400
Subject: [PATCH 029/143] Removing dependency on GeneralizedModel and Algorithm

---
 .../apache/spark/mllib/ann/ParallelANN.scala  | 79 +++++++++++++------
 1 file changed, 56 insertions(+), 23 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala
index 789854da4d32a..a3a832844fb3e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala
@@ -28,7 +28,7 @@ import breeze.linalg.{axpy => brzAxpy, Vector => BV}
 import breeze.linalg.{Vector => BV}
 import breeze.linalg.{axpy => brzAxpy}
 import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.mllib.regression.RegressionModel
+import org.apache.spark.mllib.regression.{LabeledPoint, RegressionModel}
 import org.apache.spark.util.random.XORShiftRandom
 
 /*
@@ -77,29 +77,24 @@ import org.apache.spark.util.random.XORShiftRandom
  */
 
 class ParallelANNModel private[mllib] (
-    override val weights: Vector,
+    val weights: Vector,
     val topology: Array[Int] )
-  extends GeneralizedModel(weights) with RegressionModel with Serializable {
+  extends Serializable {
 
-  val L = topology.length-1
-
-  val ofsWeight: Array[Int] = {
-
-    var tmp = new Array[Int]( L + 1 )
+  private val L = topology.length - 1
 
+  private val ofsWeight: Array[Int] = {
+    val tmp = new Array[Int](L + 1)
     var curPos = 0;
-
     tmp( 0 ) = 0;
     for( l <- 1 to L ) {
       tmp( l ) = curPos
       curPos = curPos + ( topology( l - 1 ) + 1 ) * ( topology( l ) )
     }
-
     tmp
-
   }
 
-  def g( x: Double ) = 1.0 / (1.0 + math.exp( -x ) )
+  private def g( x: Double ) = 1.0 / (1.0 + math.exp( -x ) )
 
   def computeValues( arrData: Array[Double], arrWeights: Array[Double] ): Array[Double] = {
 
@@ -126,7 +121,7 @@ class ParallelANNModel private[mllib] (
 
   }
 
-  override def predictPoint( data: Vector, weights: Vector ): Double = {
+  def predictPoint( data: Vector, weights: Vector ): Double = {
     val outp = computeValues( data.toArray, weights.toArray )
     outp(0)
   }
@@ -135,6 +130,21 @@ class ParallelANNModel private[mllib] (
     Vectors.dense( computeValues( data.toArray, weights.toArray ) )
   }
 
+  /**
+   * Predict values for a single data point using the model trained.
+   *
+   * @param testData array representing a single data point
+   * @return Vector prediction from the trained model
+   *
+   * Returns the complete vector.
+   */
+  def predictV( testData: Vector ): Vector = {
+
+    predictPointV( testData, weights )
+
+  }
+
+
 }
 
 class ParallelANN(
@@ -142,13 +152,13 @@ class ParallelANN(
     private var numIterations: Int,
     private var stepSize: Double,
     private var miniBatchFraction: Double )
-  extends GeneralizedAlgorithm[ParallelANNModel] with Serializable {
+  extends Serializable {
 
   private val rand = new XORShiftRandom
 
   private val gradient = new LeastSquaresGradientANN( topology )
   private val updater = new ANNUpdater()
-  override val optimizer = new GradientDescent(gradient, updater)
+  val optimizer = new GradientDescent(gradient, updater)
     .setStepSize(stepSize)
     .setNumIterations(numIterations)
     .setMiniBatchFraction(miniBatchFraction)
@@ -173,7 +183,7 @@ class ParallelANN(
     this( Array( noInput, noHidden, noOutput ) )
   }
 
-  override protected def createModel( weights: Vector ) = {
+  protected def createModel( weights: Vector ) = {
     new ParallelANNModel( weights, topology )
   }
 
@@ -217,26 +227,49 @@ class ParallelANN(
 
   }
 
+  private def run(input: RDD[(Vector,Vector)], initialWeights: Vector): ParallelANNModel = {
+
+    val data = input.map( v => (
+      (0.0).toDouble,
+      Vectors.fromBreeze( DenseVector.vertcat(
+        v._1.toBreeze.toDenseVector,
+        v._2.toBreeze.toDenseVector ) )
+      ) )
+    val weights = optimizer.optimize(data, initialWeights)
+    createModel( weights )
+  }
+
+}
+
+object ParallelANN {
+
+  def train(
+             input: RDD[(Vector,Vector)],
+             numIterations: Int,
+             stepSize: Double,
+             regParam: Double,
+             miniBatchFraction: Double,
+             initialWeights: Vector): ParallelANNModel = {
+    null
+  }
+
 }
 
+
 class LeastSquaresGradientANN(
     topology: Array[Int] )
   extends Gradient {
 
-  def g( x: Double ) = 1.0 / (1.0 + math.exp( -x ) )
-
-  val L = topology.length-1
+  private def g( x: Double ) = 1.0 / (1.0 + math.exp( -x ) )
 
-  val noWeights = {
+  private val L = topology.length - 1
 
+  private val noWeights = {
     var tmp = 0
-
     for( i<-1 to L ) {
       tmp = tmp + topology(i) * ( topology( i - 1 ) + 1 )
     }
-
     tmp
-
   }
 
   val ofsWeight: Array[Int] = {

From bd4508bf251258e4f463e30b4e7fa7d8789977d7 Mon Sep 17 00:00:00 2001
From: Alexander Ulanov <nashb@yandex.ru>
Date: Tue, 9 Sep 2014 17:58:47 +0400
Subject: [PATCH 030/143] Addressing reviewers comments: interface refactoring

---
 .../mllib/ann/ArtificialNeuralNetwork.scala   | 351 ++++++++++++
 .../spark/mllib/ann/GeneralizedModel.scala    | 158 ------
 .../apache/spark/mllib/ann/ParallelANN.scala  | 521 ------------------
 .../org/apache/spark/mllib/ann/ANNSuite.scala |   5 +-
 .../spark/mllib/ann/TestParallelANN.scala     | 205 ++++---
 5 files changed, 452 insertions(+), 788 deletions(-)
 create mode 100644 mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
 delete mode 100644 mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedModel.scala
 delete mode 100644 mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
new file mode 100644
index 0000000000000..6ecbb18202817
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -0,0 +1,351 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.ann
+
+import breeze.linalg.{DenseVector, Vector => BV, axpy => brzAxpy}
+import org.apache.spark.mllib.linalg.{Vector, Vectors}
+import org.apache.spark.mllib.optimization._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.util.random.XORShiftRandom
+
+/*
+ * Implements a Artificial Neural Network (ANN)
+ *
+ * The data consists of an input vector and an output vector, combined into a single vector
+ * as follows:
+ *
+ * [ ---input--- ---output--- ]
+ *
+ * NOTE: output values should be in the range [0,1]
+ *
+ * For a network of L layers:
+ *
+ * topology( l ) indicates the number of nodes in layer l, excluding the bias node.
+ *
+ * noInput = topology(0), the number of input nodes
+ * noOutput = topology(L-1), the number of output nodes
+ *
+ * input = data( 0 to noInput-1 )
+ * output = data( noInput to noInput+noOutput-1 )
+ *
+ * W_ijl is the weight from node i in layer l-1 to node j in layer l
+ * W_ijl goes to position ofsWeight(l) + j*(topology(l-1)+1) + i in the weights vector
+ *
+ * B_jl is the bias input of node j in layer l
+ * B_jl goes to position ofsWeight(l) + j*(topology(l-1)+1) + topology(l-1) in the weights vector
+ *
+ * error function: E( O, Y ) = sum( O_j - Y_j )
+ * (with O = (O_0, ..., O_(noOutput-1)) the output of the ANN,
+ * and (Y_0, ..., Y_(noOutput-1)) the input)
+ *
+ * node_jl is node j in layer l
+ * node_jl goes to position ofsNode(l) + j
+ *
+ * The weights gradient is defined as dE/dW_ijl and dE/dB_jl
+ * It has same mapping as W_ijl and B_jl
+ *
+ * For back propagation:
+ * delta_jl = dE/dS_jl, where S_jl the output of node_jl, but before applying the sigmoid
+ * delta_jl has the same mapping as node_jl
+ *
+ * Where E = ((estOutput-output),(estOutput-output)),
+ * the inner product of the difference between estimation and target output with itself.
+ *
+ */
+
+class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topology: Array[Int])
+  extends Serializable {
+
+  private val L = topology.length - 1
+
+  private val ofsWeight: Array[Int] = {
+    val tmp = new Array[Int](L + 1)
+    var curPos = 0
+    tmp(0) = 0
+    for (l <- 1 to L) {
+      tmp(l) = curPos
+      curPos = curPos + (topology(l - 1) + 1) * (topology(l))
+    }
+    tmp
+  }
+
+  private def g(x: Double) = 1.0 / (1.0 + math.exp(-x))
+
+  def computeValues(arrData: Array[Double], arrWeights: Array[Double]): Array[Double] = {
+    var arrPrev = new Array[Double](topology(0))
+    for (i <- 0 until topology(0))
+      arrPrev(i) = arrData(i)
+    for (l <- 1 to L) {
+      val arrCur = new Array[Double](topology(l))
+      for (j <- 0 until topology(l)) {
+        var cum: Double = 0.0
+        for (i <- 0 until topology(l - 1))
+          cum = cum +
+            arrPrev(i) * arrWeights(ofsWeight(l) + (topology(l - 1) + 1) * j + i)
+        cum = cum +
+          arrWeights(ofsWeight(l) + (topology(l - 1) + 1) * j + topology(l - 1)) // bias
+        arrCur(j) = g(cum)
+      }
+      arrPrev = arrCur
+    }
+    arrPrev
+  }
+
+  def predictPoint(data: Vector, weights: Vector): Double = {
+    val outp = computeValues(data.toArray, weights.toArray)
+    outp(0)
+  }
+
+  def predictPointV(data: Vector, weights: Vector): Vector = {
+    Vectors.dense(computeValues(data.toArray, weights.toArray))
+  }
+
+  /**
+   * Predict values for a single data point using the model trained.
+   *
+   * @param testData array representing a single data point
+   * @return Vector prediction from the trained model
+   *
+   *         Returns the complete vector.
+   */
+  def predictV(testData: Vector): Vector = {
+    predictPointV(testData, weights)
+  }
+}
+
+class ArtificialNeuralNetwork private(
+                           private var topology: Array[Int],
+                           private var numIterations: Int,
+                           private var stepSize: Double,
+                           private var miniBatchFraction: Double)
+  extends Serializable {
+
+  private val gradient = new ANNLeastSquaresGradient(topology)
+  private val updater = new ANNUpdater()
+  private val optimizer = new GradientDescent(gradient, updater)
+    .setStepSize(stepSize)
+    .setNumIterations(numIterations)
+    .setMiniBatchFraction(miniBatchFraction)
+
+  private def run(input: RDD[(Vector, Vector)], initialWeights: Vector):
+  ArtificialNeuralNetworkModel = {
+    val data = input.map(v =>
+      (0.0,
+        Vectors.fromBreeze(DenseVector.vertcat(
+          v._1.toBreeze.toDenseVector,
+          v._2.toBreeze.toDenseVector))
+        ))
+    val weights = optimizer.optimize(data, initialWeights)
+    new ArtificialNeuralNetworkModel(weights, topology)
+  }
+}
+
+object ArtificialNeuralNetwork {
+
+  def train(
+             input: RDD[(Vector, Vector)],
+             topology: Array[Int],
+             initialWeights: Vector,
+             numIterations: Int,
+             stepSize: Double,
+             miniBatchFraction: Double): ArtificialNeuralNetworkModel = {
+    new ArtificialNeuralNetwork(topology, numIterations, stepSize, miniBatchFraction)
+      .run(input, initialWeights)
+  }
+
+  def train(
+             input: RDD[(Vector, Vector)],
+             topology: Array[Int],
+             initialWeights: Vector,
+             numIterations: Int,
+             stepSize: Double): ArtificialNeuralNetworkModel = {
+    new ArtificialNeuralNetwork(topology, numIterations, stepSize, 1.0).run(input, initialWeights)
+  }
+
+  def train(
+             input: RDD[(Vector, Vector)],
+             topology: Array[Int],
+             numIterations: Int,
+             stepSize: Double,
+             miniBatchFraction: Double): ArtificialNeuralNetworkModel = {
+    new ArtificialNeuralNetwork(topology, numIterations, stepSize, miniBatchFraction)
+      .run(input, randomWeights(topology))
+  }
+
+  def train(
+             input: RDD[(Vector, Vector)],
+             topology: Array[Int],
+             numIterations: Int,
+             stepSize: Double): ArtificialNeuralNetworkModel = {
+    train(input, topology, numIterations, stepSize, 1.0)
+  }
+
+  def randomWeights(topology: Array[Int]): Vector = {
+    val rand = new XORShiftRandom()
+    val noWeights = {
+      var tmp = 0
+      for (i <- 1 until topology.size) {
+        tmp = tmp + topology(i) * (topology(i - 1) + 1)
+      }
+      tmp
+    }
+
+    val initialWeightsArr = new Array[Double](noWeights)
+    var pos = 0;
+    for (l <- 1 until topology.length) {
+      for (i <- 0 until (topology(l) * (topology(l - 1) + 1))) {
+        initialWeightsArr(pos) = (rand.nextDouble * 4.8 - 2.4) / (topology(l - 1) + 1)
+        pos += 1;
+      }
+    }
+    Vectors.dense(initialWeightsArr)
+  }
+
+}
+
+private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
+
+  private def g(x: Double) = 1.0 / (1.0 + math.exp(-x))
+
+  private val L = topology.length - 1
+
+  private val noWeights = {
+    var tmp = 0
+    for (i <- 1 to L) {
+      tmp = tmp + topology(i) * (topology(i - 1) + 1)
+    }
+    tmp
+  }
+
+  val ofsWeight: Array[Int] = {
+    val tmp = new Array[Int](L + 1)
+    var curPos = 0;
+    tmp(0) = 0;
+    for (l <- 1 to L) {
+      tmp(l) = curPos
+      curPos = curPos + (topology(l - 1) + 1) * (topology(l))
+    }
+    tmp
+  }
+
+  val noNodes: Int = {
+    var tmp: Integer = 0
+    for (l <- 0 until topology.size) {
+      tmp = tmp + topology(l)
+    }
+    tmp
+  }
+
+  val ofsNode: Array[Int] = {
+    val tmp = new Array[Int](L + 1)
+    tmp(0) = 0
+    for (l <- 1 to L) {
+      tmp(l) = tmp(l - 1) + topology(l - 1)
+    }
+    tmp
+  }
+
+  override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
+    val arrData = data.toArray
+    val arrWeights = weights.toArray
+    val arrNodes = new Array[Double](noNodes)
+    // forward run
+    for (i <- 0 until topology(0)) {
+      arrNodes(i) = arrData(i)
+    }
+    for (l <- 1 to L) {
+      for (j <- 0 until topology(l)) {
+        var cum: Double = 0.0;
+        for (i <- 0 until topology(l - 1)) {
+          cum = cum +
+            arrWeights(ofsWeight(l) + (topology(l - 1) + 1) * j + i) *
+              arrNodes(ofsNode(l - 1) + i)
+        }
+        cum = cum + arrWeights(ofsWeight(l) + (topology(l - 1) + 1) * j + topology(l - 1))
+        arrNodes(ofsNode(l) + j) = g(cum)
+      }
+    }
+    val arrDiff = new Array[Double](topology(L))
+    for (j <- 0 until topology(L)) {
+      arrDiff(j) = (arrNodes(ofsNode(L) + j) - arrData(topology(0) + j))
+    }
+    var err: Double = 0;
+    for (j <- 0 until topology(L)) {
+      err = err + arrDiff(j) * arrDiff(j)
+    }
+    err = err * .5
+    // back propagation
+    val arrDelta = new Array[Double](noNodes)
+    for (j <- 0 until topology(L)) {
+      arrDelta(ofsNode(L) + j) =
+        arrDiff(j) *
+          arrNodes(ofsNode(L) + j) * (1 - arrNodes(ofsNode(L) + j))
+    }
+    for (l <- L - 1 until 0 by -1) {
+      for (j <- 0 until topology(l)) {
+        var cum: Double = 0.0
+        for (i <- 0 until topology(l + 1)) {
+          cum = cum +
+            arrWeights(ofsWeight(l + 1) + (topology(l) + 1) * i + j) *
+              arrDelta(ofsNode(l + 1) + i) *
+              arrNodes(ofsNode(l) + j) * (1 - arrNodes(ofsNode(l) + j))
+        }
+        arrDelta(ofsNode(l) + j) = cum
+      }
+    }
+    // gradient
+    val arrGrad = new Array[Double](noWeights)
+    for (l <- 1 to L) {
+      for (j <- 0 until topology(l)) {
+        for (i <- 0 until topology(l - 1)) {
+          arrGrad(ofsWeight(l) + (topology(l - 1) + 1) * j + i) =
+            arrNodes(ofsNode(l - 1) + i) *
+              arrDelta(ofsNode(l) + j)
+        }
+        arrGrad(ofsWeight(l) + (topology(l - 1) + 1) * j + topology(l - 1)) =
+          arrDelta(ofsNode(l) + j)
+      }
+    }
+    (Vectors.dense(arrGrad), err)
+  }
+
+  override def compute(
+                        data: Vector,
+                        label: Double,
+                        weights: Vector,
+                        cumGradient: Vector): Double = {
+    val (grad, err) = compute(data, label, weights)
+    cumGradient.toBreeze += grad.toBreeze
+    return err
+  }
+}
+
+private class ANNUpdater extends Updater {
+
+  override def compute(
+                        weightsOld: Vector,
+                        gradient: Vector,
+                        stepSize: Double,
+                        iter: Int,
+                        regParam: Double): (Vector, Double) = {
+    val thisIterStepSize = stepSize
+    val brzWeights: BV[Double] = weightsOld.toBreeze.toDenseVector
+    brzAxpy(-thisIterStepSize, gradient.toBreeze, brzWeights)
+    (Vectors.fromBreeze(brzWeights), 0)
+  }
+}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedModel.scala
deleted file mode 100644
index ba3e31ae9d6da..0000000000000
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedModel.scala
+++ /dev/null
@@ -1,158 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.mllib.ann
-
-import breeze.linalg.{DenseVector => BDV, SparseVector => BSV}
-import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.Logging
-import org.apache.spark.rdd.RDD
-import org.apache.spark.mllib.optimization._
-import org.apache.spark.mllib.linalg.{Vectors, Vector}
-import breeze.linalg.DenseVector
-import breeze.linalg.{DenseVector => BDV}
-import breeze.linalg.{SparseVector => BSV}
-
-/**
- * :: DeveloperApi ::
- * GeneralizedModel represents a model trained using
- * GeneralizedAlgorithm.
- *
- * @param weights Weights computed for every feature.
- */
-@DeveloperApi
-abstract class GeneralizedModel(val weights: Vector )
-
-  extends Serializable {
-
-  /**
-   * Predict the result given a data point and the weights learned.
-   *
-   * @param dataMatrix Row vector containing the features for this data point
-   * @param weightMatrix Column vector containing the weights of the model
-   *
-   * If the prediction model consists of a multi-dimensional vector, predictPoint
-   * returns only the first element of each vector. To get the whole vector,
-   * use predictPointV instead.
-   */
-  protected def predictPoint( dataMatrix: Vector, weightMatrix: Vector ): Double
-
-  /**
-   * Predict the result given a data point and the weights learned.
-   *
-   * @param dataMatrix Row vector containing the features for this data point
-   * @param weightMatrix Column vector containing the weights of the model
-   *
-   * Returns the complete output vector.
-   */
-  protected def predictPointV( dataMatrix: Vector, weightsMatrix: Vector ): Vector
-
-  /**
-   * Predict values for the given data set using the model trained.
-   *
-   * @param testData RDD representing data points to be predicted
-   * @return RDD[Double] where each entry contains the corresponding prediction
-   *
-   * Returns only first element of output vector.
-   */
-  def predict( testData: RDD[Vector] ): RDD[Double] = {
-
-    val localWeights = weights
-    testData.map(v => predictPoint(v, localWeights ) )
-
-  }
-
-  /**
-   * Predict values for the given data set using the model trained.
-   *
-   * @param testData RDD representing data points to be predicted
-   * @return RDD[Vector] where each entry contains the corresponding prediction
-   *
-   * Returns the complete output vector.
-   */
-  def predictV( testData: RDD[Vector] ): RDD[Vector] = {
-
-    val localWeights = weights
-    testData.map( v => predictPointV( v, localWeights ) )
-
-  }
-
-  /**
-   * Predict values for a single data point using the model trained.
-   *
-   * @param testData array representing a single data point
-   * @return Double prediction from the trained model
-   *
-   * Returns only first element of output vector.
-   */
-  def predict( testData: Vector ): Double = {
-
-    predictPoint( testData, weights )
-
-  }
-
-  /**
-   * Predict values for a single data point using the model trained.
-   *
-   * @param testData array representing a single data point
-   * @return Vector prediction from the trained model
-   *
-   * Returns the complete vector.
-   */
-  def predictV( testData: Vector ): Vector = {
-
-    predictPointV( testData, weights )
-
-  }
-
-}
-
-/**
- * :: DeveloperApi ::
- * GeneralizedAlgorithm implements methods to train a function.
- * This class should be extended with an Optimizer to create a new GM.
- */
-@DeveloperApi
-abstract class GeneralizedAlgorithm[M <: GeneralizedModel]
-  extends Logging with Serializable {
-
-  /** The optimizer to solve the problem. */
-  def optimizer: Optimizer
-
-  /**
-   * Create a model given the weights
-   */
-  protected def createModel(weights: Vector): M
-
-  /**
-   * Run the algorithm with the configured parameters on an input RDD
-   * of (Vector,Vector) entries starting from the initial weights provided.
-   */
-  def run(input: RDD[(Vector,Vector)], initialWeights: Vector): M = {
-
-    val data = input.map( v => (
-      (0.0).toDouble,
-      Vectors.fromBreeze( DenseVector.vertcat(
-        v._1.toBreeze.toDenseVector,
-        v._2.toBreeze.toDenseVector ) )
-      ) )
-    val weights = optimizer.optimize(data, initialWeights)
-
-    createModel( weights )
-
-  }
-}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala
deleted file mode 100644
index a3a832844fb3e..0000000000000
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala
+++ /dev/null
@@ -1,521 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.mllib.ann
-
-import org.apache.spark.rdd.RDD
-import org.apache.spark.mllib.linalg.Vector
-import org.apache.spark.mllib.optimization._
-import org.apache.spark.mllib.linalg.Vectors
-import breeze.linalg.DenseVector
-import org.apache.spark.mllib.linalg.Vectors
-import org.apache.spark.rdd.RDD
-import breeze.linalg.{axpy => brzAxpy, Vector => BV}
-import breeze.linalg.{Vector => BV}
-import breeze.linalg.{axpy => brzAxpy}
-import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.mllib.regression.{LabeledPoint, RegressionModel}
-import org.apache.spark.util.random.XORShiftRandom
-
-/*
- * Implements a Artificial Neural Network (ANN)
- *
- * The data consists of an input vector and an output vector, combined into a single vector
- * as follows:
- *
- * [ ---input--- ---output--- ]
- *
- * NOTE: output values should be in the range [0,1]
- *
- * For a network of L layers:
- *
- * topology( l ) indicates the number of nodes in layer l, excluding the bias node.
- *
- * noInput = topology(0), the number of input nodes
- * noOutput = topology(L-1), the number of output nodes
- *
- * input = data( 0 to noInput-1 )
- * output = data( noInput to noInput+noOutput-1 )
- *
- * W_ijl is the weight from node i in layer l-1 to node j in layer l
- * W_ijl goes to position ofsWeight(l) + j*(topology(l-1)+1) + i in the weights vector
- *
- * B_jl is the bias input of node j in layer l
- * B_jl goes to position ofsWeight(l) + j*(topology(l-1)+1) + topology(l-1) in the weights vector
- *
- * error function: E( O, Y ) = sum( O_j - Y_j )
- * (with O = (O_0, ..., O_(noOutput-1)) the output of the ANN,
- * and (Y_0, ..., Y_(noOutput-1)) the input)
- *
- * node_jl is node j in layer l
- * node_jl goes to position ofsNode(l) + j
- *
- * The weights gradient is defined as dE/dW_ijl and dE/dB_jl
- * It has same mapping as W_ijl and B_jl
- *
- * For back propagation:
- * delta_jl = dE/dS_jl, where S_jl the output of node_jl, but before applying the sigmoid
- * delta_jl has the same mapping as node_jl
- *
- * Where E = ((estOutput-output),(estOutput-output)),
- * the inner product of the difference between estimation and target output with itself.
- *
- */
-
-class ParallelANNModel private[mllib] (
-    val weights: Vector,
-    val topology: Array[Int] )
-  extends Serializable {
-
-  private val L = topology.length - 1
-
-  private val ofsWeight: Array[Int] = {
-    val tmp = new Array[Int](L + 1)
-    var curPos = 0;
-    tmp( 0 ) = 0;
-    for( l <- 1 to L ) {
-      tmp( l ) = curPos
-      curPos = curPos + ( topology( l - 1 ) + 1 ) * ( topology( l ) )
-    }
-    tmp
-  }
-
-  private def g( x: Double ) = 1.0 / (1.0 + math.exp( -x ) )
-
-  def computeValues( arrData: Array[Double], arrWeights: Array[Double] ): Array[Double] = {
-
-    var arrPrev = new Array[Double]( topology( 0 ) )
-
-    for( i <- 0 until topology( 0 ) )
-      arrPrev( i ) = arrData( i )
-
-    for( l <- 1 to L ) {
-      val arrCur = new Array[Double]( topology( l ) )
-      for( j <- 0 until topology( l ) ) {
-        var cum: Double = 0.0
-        for( i <-0 until topology( l-1 ) )
-          cum = cum +
-            arrPrev( i ) * arrWeights( ofsWeight( l ) + ( topology( l-1 ) + 1 ) * j + i )
-        cum = cum +
-          arrWeights( ofsWeight( l ) + ( topology( l-1 ) + 1 )*j + topology( l-1 ) ) // bias
-        arrCur( j ) = g( cum )
-      }
-      arrPrev = arrCur;
-    }
-
-    arrPrev
-
-  }
-
-  def predictPoint( data: Vector, weights: Vector ): Double = {
-    val outp = computeValues( data.toArray, weights.toArray )
-    outp(0)
-  }
-
-  def predictPointV( data: Vector, weights: Vector): Vector = {
-    Vectors.dense( computeValues( data.toArray, weights.toArray ) )
-  }
-
-  /**
-   * Predict values for a single data point using the model trained.
-   *
-   * @param testData array representing a single data point
-   * @return Vector prediction from the trained model
-   *
-   * Returns the complete vector.
-   */
-  def predictV( testData: Vector ): Vector = {
-
-    predictPointV( testData, weights )
-
-  }
-
-
-}
-
-class ParallelANN(
-    private var topology: Array[Int],
-    private var numIterations: Int,
-    private var stepSize: Double,
-    private var miniBatchFraction: Double )
-  extends Serializable {
-
-  private val rand = new XORShiftRandom
-
-  private val gradient = new LeastSquaresGradientANN( topology )
-  private val updater = new ANNUpdater()
-  val optimizer = new GradientDescent(gradient, updater)
-    .setStepSize(stepSize)
-    .setNumIterations(numIterations)
-    .setMiniBatchFraction(miniBatchFraction)
-
-  val noWeights = {
-
-    var tmp = 0
-
-    for( i<-1 until topology.size ) {
-      tmp = tmp + topology(i) * (topology(i-1) + 1)
-    }
-
-    tmp
-
-  }
-
-  def this( topology: Array[Int] ) = {
-    this( topology, 100, 1.0, 1.0 )
-  }
-
-  def this( noInput: Int, noHidden: Int, noOutput: Int ) = {
-    this( Array( noInput, noHidden, noOutput ) )
-  }
-
-  protected def createModel( weights: Vector ) = {
-    new ParallelANNModel( weights, topology )
-  }
-
-  def train( rdd: RDD[(Vector,Vector)] ): ParallelANNModel = {
-
-    val ft = rdd.first()
-
-    assert( topology( 0 ) == ft._1.size )
-    assert( topology( topology.length-1 ) == ft._2.size )
-
-    val initialWeightsArr = new Array[Double](noWeights)
-
-    var pos = 0;
-
-    for( l <- 1 until topology.length ) {
-      for( i <- 0 until ( topology( l ) * ( topology( l - 1 ) + 1 ) ) ) {
-        initialWeightsArr( pos ) = ( rand.nextDouble * 4.8 - 2.4 ) / ( topology( l - 1 ) + 1)
-      pos = pos + 1;
-      }
-    }
-
-    assert( pos == noWeights )
-
-    val initialWeights = Vectors.dense( initialWeightsArr )
-
-    run( rdd, initialWeights )
-
-  }
-
-  def train( rdd: RDD[(Vector,Vector)], model: ParallelANNModel ): ParallelANNModel = {
-
-    run( rdd, model.weights )
-
-  }
-
-  def train( rdd: RDD[(Vector,Vector)], weights: Vector ): ParallelANNModel = {
-
-    val ft = rdd.first()
-    assert( weights.size == noWeights )
-    run( rdd, weights );
-
-  }
-
-  private def run(input: RDD[(Vector,Vector)], initialWeights: Vector): ParallelANNModel = {
-
-    val data = input.map( v => (
-      (0.0).toDouble,
-      Vectors.fromBreeze( DenseVector.vertcat(
-        v._1.toBreeze.toDenseVector,
-        v._2.toBreeze.toDenseVector ) )
-      ) )
-    val weights = optimizer.optimize(data, initialWeights)
-    createModel( weights )
-  }
-
-}
-
-object ParallelANN {
-
-  def train(
-             input: RDD[(Vector,Vector)],
-             numIterations: Int,
-             stepSize: Double,
-             regParam: Double,
-             miniBatchFraction: Double,
-             initialWeights: Vector): ParallelANNModel = {
-    null
-  }
-
-}
-
-
-class LeastSquaresGradientANN(
-    topology: Array[Int] )
-  extends Gradient {
-
-  private def g( x: Double ) = 1.0 / (1.0 + math.exp( -x ) )
-
-  private val L = topology.length - 1
-
-  private val noWeights = {
-    var tmp = 0
-    for( i<-1 to L ) {
-      tmp = tmp + topology(i) * ( topology( i - 1 ) + 1 )
-    }
-    tmp
-  }
-
-  val ofsWeight: Array[Int] = {
-
-    var tmp = new Array[Int]( L + 1 )
-    var curPos = 0;
-
-    tmp( 0 ) = 0;
-    for( l <- 1 to L ) {
-      tmp( l ) = curPos
-      curPos = curPos + ( topology( l - 1 ) + 1 ) * ( topology( l ) )
-    }
-
-    tmp
-
-  }
-
-  val noNodes: Int = {
-
-    var tmp: Integer = 0
-
-    for( l <-0 until topology.size ) {
-      tmp = tmp + topology( l )
-    }
-
-    tmp
-
-  }
-
-  val ofsNode: Array[Int] = {
-
-    var tmp = new Array[Int]( L + 1 )
-    tmp( 0 ) = 0
-
-    for( l <-1 to L ) {
-      tmp( l ) = tmp( l - 1 ) + topology( l - 1 )
-    }
-
-    tmp
-
-  }
-
-  /* For verification only
-  def calcErr( arrData: Array[Double], arrWeights: Array[Double] ): Double = {
-
-    var arrPrev = new Array[Double]( topology( 0 ) )
-
-    for( i <- 0 until topology( 0 ) )
-      arrPrev( i ) = arrData( i )
-
-    for( l <- 1 to L ) {
-      val arrCur = new Array[Double]( topology( l ) )
-      for( j <- 0 until topology( l ) ) {
-        var cum: Double = 0.0
-        for( i <-0 until topology( l-1 ) ) {
-          cum = cum +
-            arrPrev( i ) * arrWeights( ofsWeight( l ) + ( topology( l-1 ) + 1 ) * j + i )
-        }
-        cum = cum +
-          arrWeights( ofsWeight( l ) + ( topology( l-1 ) + 1 )*j + topology( l-1 ) ) // bias
-        arrCur( j ) = g( cum )
-      }
-      arrPrev = arrCur;
-    }
-
-    val arrDiff = new Array[Double]( topology( L ) )
-    for( j <- 0 until topology( L ) ) {
-      arrDiff( j ) = ( arrPrev( j ) - arrData( topology(0) + j ) )
-    }
-
-    var err: Double = 0;
-    for( j <-0 until topology( L ) ) {
-      err = err + arrDiff( j )*arrDiff( j )
-    }
-
-    err*.5
-  }
-  */
-
-  override def compute( data: Vector, label: Double, weights: Vector ): ( Vector, Double ) = {
-
-    val arrData = data.toArray
-    val arrWeights = weights.toArray
-    val arrNodes = new Array[Double]( noNodes )
-
-    /*
-     * nodes
-     */
-
-    for( i <- 0 until topology( 0 ) ) {
-      arrNodes( i ) = arrData( i )
-    }
-
-    for( l <- 1 to L ) {
-      for( j <- 0 until topology( l ) ) {
-        var cum: Double = 0.0;
-        for( i <- 0 until topology( l-1 ) ) {
-          cum = cum +
-            arrWeights( ofsWeight( l ) + ( topology( l-1 ) + 1 ) * j + i ) *
-            arrNodes( ofsNode( l-1 ) + i )
-        }
-        cum = cum + arrWeights( ofsWeight( l ) + ( topology( l-1 ) +  1 )*j + topology( l-1 ) )
-        arrNodes( ofsNode( l ) + j ) = g( cum )
-      }
-    }
-
-    val arrDiff = new Array[Double]( topology( L ) )
-    for( j <- 0 until topology( L ) ) {
-      arrDiff( j ) = ( arrNodes( ofsNode( L ) + j ) - arrData( topology(0) + j ) )
-    }
-
-    var err: Double = 0;
-    for( j <-0 until topology( L ) ) {
-      err = err + arrDiff( j )*arrDiff( j )
-    }
-    err = err*.5
-
-    /*
-     * back propagation
-     */
-
-    val arrDelta = new Array[Double]( noNodes )
-
-    for( j <- 0 until topology( L ) ) {
-      arrDelta( ofsNode( L ) + j ) =
-        arrDiff( j ) *
-        arrNodes( ofsNode( L ) + j ) * ( 1 - arrNodes( ofsNode( L ) + j ) )
-    }
-
-    for( l <- L-1 until 0 by -1 ) {
-      for( j <- 0 until topology( l ) ) {
-        var cum: Double = 0.0
-        for( i <- 0 until topology( l + 1 ) ) {
-          cum = cum +
-            arrWeights( ofsWeight( l + 1 ) + ( topology( l ) + 1 ) * i + j ) *
-            arrDelta( ofsNode( l + 1 ) + i )  *
-            arrNodes( ofsNode( l ) + j ) * ( 1 - arrNodes( ofsNode( l ) + j ) )
-        }
-        arrDelta( ofsNode( l ) + j ) = cum
-      }
-    }
-
-    /*
-     * gradient
-     */
-
-    /* for verification only
-    val arrWcopy = new Array[Double]( noWeights )
-    Array.copy(arrWeights, 0, arrWcopy, 0, noWeights )
-    val eps = 0.000001
-    val errGradAccept = 5e-6
-    */
-
-    val arrGrad = new Array[Double]( noWeights )
-
-    for( l <- 1 to L ) {
-      for( j <-0 until topology( l ) ) {
-        for( i <- 0 until topology( l-1 ) ) {
-          arrGrad( ofsWeight( l ) + ( topology( l - 1 ) + 1 ) * j + i ) =
-            arrNodes( ofsNode( l - 1 ) + i ) *
-            arrDelta( ofsNode( l ) + j )
-
-          /* for verification only
-          val tmpErr0 = calcErr( arrData, arrWcopy )
-          arrWcopy( ofsWeight( l ) + ( topology( l -1 ) + 1 ) * j + i ) =
-            arrWcopy( ofsWeight( l ) + ( topology( l -1 ) + 1 ) * j + i ) + eps
-          val tmpErr1 = calcErr( arrData, arrWcopy )
-          arrWcopy( ofsWeight( l ) + ( topology( l -1 ) + 1 ) * j + i ) =
-            arrWcopy( ofsWeight( l ) + ( topology( l -1 ) + 1 ) * j + i ) - eps
-          val dE = ( tmpErr1 - tmpErr0 ) / eps
-
-          val errGrad =
-            math.abs( dE - arrGrad( ofsWeight( l ) +
-            ( topology( l - 1 ) + 1 ) * j + i ) )
-
-          try {
-            assert( errGrad < errGradAccept )
-          }
-          catch {
-            case e: AssertionError =>
-              println( (dE, arrGrad( ofsWeight( l ) +
-                ( topology( l - 1 ) + 1 ) * j + i ), errGrad ) )
-          }
-          */
-        }
-
-        arrGrad( ofsWeight( l ) + ( topology( l - 1 ) + 1 ) * j + topology( l-1 )  ) =
-          arrDelta( ofsNode( l ) + j )
-
-        /* for verification only
-        val tmpErr0 = calcErr( arrData, arrWcopy )
-        arrWcopy( ofsWeight( l ) + ( topology( l - 1 ) + 1 ) * j + topology( l-1 ) ) =
-          arrWcopy( ofsWeight( l ) + ( topology( l - 1 ) + 1 ) * j + topology( l-1 ) ) + eps
-        val tmpErr1 = calcErr( arrData, arrWcopy )
-        arrWcopy( ofsWeight( l ) + ( topology( l - 1 ) + 1 ) * j + topology( l-1 ) ) =
-          arrWcopy( ofsWeight( l ) + ( topology( l - 1 ) + 1 ) * j + topology( l-1 ) ) - eps
-        val dE = ( tmpErr1 - tmpErr0 ) / eps
-
-        val errGrad = math.abs( dE -
-          arrGrad( ofsWeight( l ) + ( topology( l - 1 ) + 1 ) * j + topology( l-1 ) ) )
-
-        try {
-          assert( errGrad < errGradAccept )
-        }
-        catch {
-          case e: AssertionError =>
-            println( (dE,
-              arrGrad( ofsWeight( l ) + ( topology( l - 1 ) + 1 ) * j + topology( l-1 ) ),
-              errGrad ) )
-        }
-        */
-
-      }
-    }
-
-    ( Vectors.dense( arrGrad ), err )
-
-  }
-
-  override def compute(
-      data: Vector,
-      label: Double,
-      weights: Vector,
-      cumGradient: Vector): Double = {
-
-    val (grad, err) = compute( data, label, weights )
-    cumGradient.toBreeze += grad.toBreeze
-    return err
-
-  }
-}
-
-class ANNUpdater extends Updater {
-
-  override def compute(
-      weightsOld: Vector,
-      gradient: Vector,
-      stepSize: Double,
-      iter: Int,
-      regParam: Double): (Vector, Double) = {
-
-    val thisIterStepSize = stepSize
-    val brzWeights: BV[Double] = weightsOld.toBreeze.toDenseVector
-    brzAxpy(-thisIterStepSize, gradient.toBreeze, brzWeights)
-    (Vectors.fromBreeze(brzWeights), 0)
-
-  }
-
-}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
index ff6eb51377d78..30f51a973712f 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
@@ -19,9 +19,8 @@ class ANNSuite extends FunSuite with LocalSparkContext {
     val data = inputs.zip(outputs).map { case(features, label) =>
       (Vectors.dense(features), Vectors.dense(Array(label)))}
     val rddData = sc.parallelize(data, 2)
-    val ann = new ParallelANN(Array[Int](inputSize, hiddenSize, outputSize))
-    ann.optimizer.setNumIterations(2000).setStepSize(2.0)
-    val model = ann.train(rddData)
+    val topology = Array[Int](inputSize, hiddenSize, outputSize)
+    val model = ArtificialNeuralNetwork.train(rddData, topology, 2000, 2.0, 1.0)
     val predictionAndLabels = rddData.map { case(input, label) =>
       (model.predictV(input)(0), label(0)) }.collect()
     assert(predictionAndLabels.forall { case(p, l) => (math.round(p) - l) == 0 })
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
index a097df0c5f520..b528da19dbfb9 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
@@ -18,27 +18,27 @@
 
 package org.apache.spark.mllib.ann
 
+import java.text.SimpleDateFormat
+import java.util.Calendar
+
 import org.apache.spark._
-import org.apache.spark.mllib.regression._
 import org.apache.spark.mllib.linalg._
-import org.apache.spark.mllib.ann._
+
 import scala.util.Random
-import java.util.Calendar
-import java.text.SimpleDateFormat
 
 object TestParallelANN {
 
-  var rand = new Random( 0 )
+  var rand = new Random(0)
 
-  def generateInput2D( f: Double => Double, xmin: Double, xmax: Double, noPoints: Int ): Array[(Vector,Vector)] =
-  {
+  def generateInput2D(f: Double => Double, xmin: Double, xmax: Double, noPoints: Int):
+  Array[(Vector, Vector)] = {
 
-    var out = new Array[(Vector,Vector)](noPoints)
+    var out = new Array[(Vector, Vector)](noPoints)
 
-    for( i <- 0 to noPoints - 1 ) {
-      val x = xmin + rand.nextDouble()*(xmax - xmin)
+    for (i <- 0 to noPoints - 1) {
+      val x = xmin + rand.nextDouble() * (xmax - xmin)
       val y = f(x)
-      out(i) = ( Vectors.dense( x ), Vectors.dense( y ) )
+      out(i) = (Vectors.dense(x), Vectors.dense(y))
     }
 
     return out
@@ -46,21 +46,21 @@ object TestParallelANN {
   }
 
 
-  def generateInput3D( f: (Double,Double) => Double, xmin: Double, xmax: Double, ymin: Double, ymax: Double, noPoints: Int ): Array[(Vector,Vector)] = {
+  def generateInput3D(f: (Double, Double) => Double, xmin: Double, xmax: Double, ymin: Double, ymax: Double, noPoints: Int): Array[(Vector, Vector)] = {
 
-    var out = new Array[(Vector,Vector)](noPoints)
+    var out = new Array[(Vector, Vector)](noPoints)
 
-    for( i <- 0 to noPoints - 1 ) {
+    for (i <- 0 to noPoints - 1) {
 
-      val x = xmin + rand.nextDouble()*(xmax - xmin)
-      val y = ymin + rand.nextDouble()*(ymax - ymin)
-      val z = f( x, y )
+      val x = xmin + rand.nextDouble() * (xmax - xmin)
+      val y = ymin + rand.nextDouble() * (ymax - ymin)
+      val z = f(x, y)
 
       var arr = new Array[Double](2)
 
       arr(0) = x
       arr(1) = y
-      out(i) = ( Vectors.dense( arr ), Vectors.dense( z ) )
+      out(i) = (Vectors.dense(arr), Vectors.dense(z))
 
     }
 
@@ -68,13 +68,13 @@ object TestParallelANN {
 
   }
 
-  def generateInput4D( f: Double => (Double,Double,Double), tmin: Double, tmax: Double, noPoints: Int ): Array[(Vector,Vector)] = {
+  def generateInput4D(f: Double => (Double, Double, Double), tmin: Double, tmax: Double, noPoints: Int): Array[(Vector, Vector)] = {
 
-    var out = new Array[(Vector,Vector)](noPoints)
+    var out = new Array[(Vector, Vector)](noPoints)
 
-    for( i <- 0 to noPoints - 1 ) {
+    for (i <- 0 to noPoints - 1) {
 
-      val t: Double = tmin + rand.nextDouble()*(tmax - tmin)
+      val t: Double = tmin + rand.nextDouble() * (tmax - tmin)
       var arr = new Array[Double](3)
       var F = f(t)
 
@@ -82,59 +82,59 @@ object TestParallelANN {
       arr(1) = F._2
       arr(2) = F._3
 
-      out(i) = ( Vectors.dense( t ), Vectors.dense( arr ) )
+      out(i) = (Vectors.dense(t), Vectors.dense(arr))
     }
 
     out
 
   }
 
-  def f( T: Double ): Double = {
-    val y = 0.5 + Math.abs(T/5).toInt.toDouble*.15 + math.sin(T*math.Pi/10)*.1
-    assert( y<= 1)
+  def f(T: Double): Double = {
+    val y = 0.5 + Math.abs(T / 5).toInt.toDouble * .15 + math.sin(T * math.Pi / 10) * .1
+    assert(y <= 1)
     y
   }
 
-  def f3D( x: Double, y: Double ): Double = {
-    .5 + .24*Math.sin( x*2*math.Pi/10 ) + .24*Math.cos( y*2*math.Pi/10 )
+  def f3D(x: Double, y: Double): Double = {
+    .5 +.24 * Math.sin(x * 2 * math.Pi / 10) +.24 * Math.cos(y * 2 * math.Pi / 10)
   }
 
-  def f4D( t: Double ): (Double, Double,Double) = {
-    val x = Math.abs(.8*Math.cos( t*2*math.Pi/20 ) ) + .1
-    val y = (11 + t)/22
-    val z = .5 + .35*Math.sin(t*2*math.Pi/5)*Math.cos( t*2*math.Pi/10 ) + .15*t/11
-    ( x, y, z )
+  def f4D(t: Double): (Double, Double, Double) = {
+    val x = Math.abs(.8 * Math.cos(t * 2 * math.Pi / 20)) + .1
+    val y = (11 + t) / 22
+    val z =.5 +.35 * Math.sin(t * 2 * math.Pi / 5) * Math.cos(t * 2 * math.Pi / 10) +.15 * t / 11
+    (x, y, z)
   }
 
-  def concat( v1: Vector, v2: Vector ): Vector = {
+  def concat(v1: Vector, v2: Vector): Vector = {
 
     var a1 = v1.toArray
     var a2 = v2.toArray
-    var a3 = new Array[Double]( a1.size + a2.size )
+    var a3 = new Array[Double](a1.size + a2.size)
 
-    for( i <- 0 to a1.size - 1 ) {
+    for (i <- 0 to a1.size - 1) {
       a3(i) = a1(i)
     }
 
-    for( i <- 0 to a2.size - 1 ) {
+    for (i <- 0 to a2.size - 1) {
       a3(i + a1.size) = a2(i)
     }
 
-    Vectors.dense( a3 )
+    Vectors.dense(a3)
 
   }
 
-  def main( arg: Array[String] ) {
+  def main(arg: Array[String]) {
 
-    println( "Parallel ANN tester" )
+    println("Parallel ANN tester")
     println
 
-    val formatter = new SimpleDateFormat("hh:mm:ss")    
+    val formatter = new SimpleDateFormat("hh:mm:ss")
 
     var curAngle: Double = 0.0
     var graphic: Boolean = false
 
-    if( (arg.length>0) && (arg(0)=="graph" ) ) {
+    if ((arg.length > 0) && (arg(0) == "graph")) {
       graphic = true
     }
 
@@ -142,15 +142,15 @@ object TestParallelANN {
     var outputFrame3D: OutputFrame3D = null
     var outputFrame4D: OutputFrame3D = null
 
-    if( graphic ) {
+    if (graphic) {
 
-      outputFrame2D = new OutputFrame2D( "x -> y" )
+      outputFrame2D = new OutputFrame2D("x -> y")
       outputFrame2D.apply
 
-      outputFrame3D = new OutputFrame3D( "(x,y) -> z", 1 )
+      outputFrame3D = new OutputFrame3D("(x,y) -> z", 1)
       outputFrame3D.apply
 
-      outputFrame4D = new OutputFrame3D( "t -> (x,y,z)" )
+      outputFrame4D = new OutputFrame3D("t -> (x,y,z)")
       outputFrame4D.apply
 
     }
@@ -161,102 +161,95 @@ object TestParallelANN {
     var conf = new SparkConf().setAppName("Parallel ANN").setMaster("local[1]")
     var sc = new SparkContext(conf)
 
-    val testRDD2D = sc.parallelize( generateInput2D( T => f(T), -10, 10, 100 ), 2).cache
-    val testRDD3D = sc.parallelize( generateInput3D( (x,y) => f3D(x,y), -10, 10, -10, 10, 200 ), 2).cache
-    val testRDD4D = sc.parallelize( generateInput4D( t => f4D(t), -10, 10, 100 ), 2 ).cache
-    
-    val validationRDD2D = sc.parallelize( generateInput2D( T => f(T), -10, 10, 100 ), 2).cache
-    val validationRDD3D = sc.parallelize( generateInput3D( (x,y) => f3D(x,y), -10, 10, -10, 10, 100 ), 2).cache
-    val validationRDD4D = sc.parallelize( generateInput4D( t => f4D(t), -10, 10, 100 ), 2 ).cache
-
-    if( graphic ) {
+    val testRDD2D = sc.parallelize(generateInput2D(T => f(T), -10, 10, 100), 2).cache
+    val testRDD3D = sc.parallelize(generateInput3D((x, y) => f3D(x, y), -10, 10, -10, 10, 200), 2).cache
+    val testRDD4D = sc.parallelize(generateInput4D(t => f4D(t), -10, 10, 100), 2).cache
 
-      outputFrame2D.setData( testRDD2D.map( T => concat( T._1, T._2 ) ) )
-      outputFrame3D.setData( testRDD3D.map( T => concat( T._1, T._2 ) ) )
-      outputFrame4D.setData( testRDD4D.map( T => T._2 ) )
+    val validationRDD2D = sc.parallelize(generateInput2D(T => f(T), -10, 10, 100), 2).cache
+    val validationRDD3D = sc.parallelize(generateInput3D((x, y) => f3D(x, y), -10, 10, -10, 10, 100), 2).cache
+    val validationRDD4D = sc.parallelize(generateInput4D(t => f4D(t), -10, 10, 100), 2).cache
 
-    }
+    if (graphic) {
 
-    val parallelANN2D = new ParallelANN( Array[Int]( 1, 3, 3, 1 ) )
-    parallelANN2D.optimizer.setNumIterations(1000).setStepSize( 1.0 )
+      outputFrame2D.setData(testRDD2D.map(T => concat(T._1, T._2)))
+      outputFrame3D.setData(testRDD3D.map(T => concat(T._1, T._2)))
+      outputFrame4D.setData(testRDD4D.map(T => T._2))
 
-    val parallelANN3D = new ParallelANN( Array[Int]( 2, 20, 1 ) )
-    parallelANN3D.optimizer.setNumIterations(1000).setStepSize( 1.0 )
+    }
 
-    val parallelANN4D = new ParallelANN( Array[Int]( 1, 20, 3 ) )
-    parallelANN4D.optimizer.setNumIterations( 1000 ).setStepSize( 1.0 )
-    
     val starttime = Calendar.getInstance().getTime()
-    println( "Start training " + starttime )
+    println("Start training " + starttime)
 
-    var model2D = parallelANN2D.train( testRDD2D )
-    var model3D = parallelANN3D.train( testRDD3D )
-    var model4D = parallelANN4D.train( testRDD4D )
+    val numIterations = 1000
+    val stepSize = 1.0
+    var model2D = ArtificialNeuralNetwork.train(testRDD2D, Array[Int](1, 3, 3, 1), numIterations, stepSize)
+    var model3D = ArtificialNeuralNetwork.train(testRDD3D, Array[Int](2, 20, 1), numIterations, stepSize)
+    var model4D = ArtificialNeuralNetwork.train(testRDD4D, Array[Int](1, 20, 3), numIterations, stepSize)
 
     val noIt = 1500
-    var errHist = new Array[(Int,Double,Double,Double)]( noIt )
+    var errHist = new Array[(Int, Double, Double, Double)](noIt)
 
-    for( i <- 0 to noIt - 1 ) {
+    for (i <- 0 to noIt - 1) {
 
-      val predictedAndTarget2D = validationRDD2D.map( T => ( T._1, T._2, model2D.predictV( T._1 ) ) )
-      val predictedAndTarget3D = validationRDD3D.map( T => ( T._1, T._2, model3D.predictV( T._1 ) ) )
-      val predictedAndTarget4D = validationRDD4D.map( T => ( T._1, T._2, model4D.predictV( T._1 ) ) )
+      val predictedAndTarget2D = validationRDD2D.map(T => (T._1, T._2, model2D.predictV(T._1)))
+      val predictedAndTarget3D = validationRDD3D.map(T => (T._1, T._2, model3D.predictV(T._1)))
+      val predictedAndTarget4D = validationRDD4D.map(T => (T._1, T._2, model4D.predictV(T._1)))
 
-      var err2D = predictedAndTarget2D.map( T =>
-        (T._3.toArray(0) - T._2.toArray(0))*(T._3.toArray(0) - T._2.toArray(0))
-      ).reduce( (u,v) => u + v )
+      var err2D = predictedAndTarget2D.map(T =>
+        (T._3.toArray(0) - T._2.toArray(0)) * (T._3.toArray(0) - T._2.toArray(0))
+      ).reduce((u, v) => u + v)
 
-      var err3D = predictedAndTarget3D.map( T =>
-        (T._3.toArray(0) - T._2.toArray(0))*(T._3.toArray(0) - T._2.toArray(0))
-      ).reduce( (u,v) => u + v )
+      var err3D = predictedAndTarget3D.map(T =>
+        (T._3.toArray(0) - T._2.toArray(0)) * (T._3.toArray(0) - T._2.toArray(0))
+      ).reduce((u, v) => u + v)
 
-      var err4D = predictedAndTarget4D.map( T => {
+      var err4D = predictedAndTarget4D.map(T => {
 
         val v1 = T._2.toArray
         val v2 = T._3.toArray
 
-        (v1(0) - v2(0))*(v1(0) - v2(0)) +
-        (v1(1) - v2(1))*(v1(1) - v2(1)) +
-        (v1(2) - v2(2))*(v1(2) - v2(2))
+        (v1(0) - v2(0)) * (v1(0) - v2(0)) +
+          (v1(1) - v2(1)) * (v1(1) - v2(1)) +
+          (v1(2) - v2(2)) * (v1(2) - v2(2))
 
-      } ).reduce( (u,v) => u + v )
+      }).reduce((u, v) => u + v)
 
 
-      if( graphic ) {
+      if (graphic) {
 
         val predicted2D = predictedAndTarget2D.map(
-          T => concat( T._1, T._3 )
+          T => concat(T._1, T._3)
         )
 
         val predicted3D = predictedAndTarget3D.map(
-          T => concat( T._1, T._3 )
+          T => concat(T._1, T._3)
         )
 
         val predicted4D = predictedAndTarget4D.map(
           T => T._3
         )
 
-        curAngle = curAngle + math.Pi/4
-        if( curAngle>=2*math.Pi ) {
-          curAngle = curAngle - 2*math.Pi
+        curAngle = curAngle + math.Pi / 4
+        if (curAngle >= 2 * math.Pi) {
+          curAngle = curAngle - 2 * math.Pi
         }
 
-        outputFrame3D.setAngle( curAngle )
-        outputFrame4D.setAngle( curAngle )
+        outputFrame3D.setAngle(curAngle)
+        outputFrame4D.setAngle(curAngle)
 
-        outputFrame2D.setApproxPoints( predicted2D )
-        outputFrame3D.setApproxPoints( predicted3D )
-        outputFrame4D.setApproxPoints( predicted4D )
+        outputFrame2D.setApproxPoints(predicted2D)
+        outputFrame3D.setApproxPoints(predicted3D)
+        outputFrame4D.setApproxPoints(predicted4D)
 
       }
 
-      println( "It. "+i+" ("+Calendar.getInstance().getTime()+"), Error 2D/3D/4D: " + (err2D, err3D, err4D) )
-      errHist(i) = ( i, err2D, err3D, err4D )
+      println("It. " + i + " (" + Calendar.getInstance().getTime() + "), Error 2D/3D/4D: " +(err2D, err3D, err4D))
+      errHist(i) = (i, err2D, err3D, err4D)
 
-      if( i < noIt - 1 ) {
-        model2D = parallelANN2D.train( testRDD2D, model2D )
-        model3D = parallelANN3D.train( testRDD3D, model3D )
-        model4D = parallelANN4D.train( testRDD4D, model4D )
+      if (i < noIt - 1) {
+        model2D = ArtificialNeuralNetwork.train(testRDD2D, model2D.topology, model2D.weights, numIterations, stepSize)
+        model3D = ArtificialNeuralNetwork.train(testRDD3D, model3D.topology, model3D.weights, numIterations, stepSize)
+        model4D = ArtificialNeuralNetwork.train(testRDD4D, model4D.topology, model4D.weights, numIterations, stepSize)
       }
 
     }
@@ -265,11 +258,11 @@ object TestParallelANN {
 
     val stoptime = Calendar.getInstance().getTime()
 
-    for( i <- 0 to noIt - 1 ) {
-      println( errHist(i) )
+    for (i <- 0 to noIt - 1) {
+      println(errHist(i))
     }
 
-    println( formatter.format( starttime )+"-" + formatter.format( stoptime ) + " "+(stoptime.getTime-starttime.getTime+500)/1000+" seconds" )
+    println(formatter.format(starttime) + "-" + formatter.format(stoptime) + " " + (stoptime.getTime - starttime.getTime + 500) / 1000 + " seconds")
 
   }
 

From c0324769a37accf9c77e3d46fba14e40af111a1a Mon Sep 17 00:00:00 2001
From: Alexander Ulanov <nashb@yandex.ru>
Date: Tue, 9 Sep 2014 19:02:20 +0400
Subject: [PATCH 031/143] Apache header

---
 .../org/apache/spark/mllib/ann/ANNSuite.scala   | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
index 30f51a973712f..692366699a5fb 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.spark.mllib.ann
 
 import org.apache.spark.mllib.linalg.Vectors

From 3e90c4da57e89cb0274f5367c9ca71f08f9716ec Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Wed, 10 Sep 2014 13:58:59 +0800
Subject: [PATCH 032/143] Update ArtificialNeuralNetwork.scala

Replaced fors by whiles
---
 .../mllib/ann/ArtificialNeuralNetwork.scala   | 194 ++++++++++++------
 1 file changed, 135 insertions(+), 59 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index 6ecbb18202817..9294c46727f5c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -77,9 +77,11 @@ class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topol
     val tmp = new Array[Int](L + 1)
     var curPos = 0
     tmp(0) = 0
-    for (l <- 1 to L) {
+    var l = 1
+    while(l <= L) {
       tmp(l) = curPos
       curPos = curPos + (topology(l - 1) + 1) * (topology(l))
+      l = l + 1
     }
     tmp
   }
@@ -88,20 +90,33 @@ class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topol
 
   def computeValues(arrData: Array[Double], arrWeights: Array[Double]): Array[Double] = {
     var arrPrev = new Array[Double](topology(0))
-    for (i <- 0 until topology(0))
+    var i: Int = 0
+    var j: Int = 0
+    var l: Int = 0
+    i = 0
+    while(i < topology(0)) {
       arrPrev(i) = arrData(i)
-    for (l <- 1 to L) {
+      i = i + 1
+    }
+    l = 1
+    while(l <= L) {
       val arrCur = new Array[Double](topology(l))
-      for (j <- 0 until topology(l)) {
+      j = 0
+      while(j < topology(l)) {
         var cum: Double = 0.0
-        for (i <- 0 until topology(l - 1))
+        i = 0
+        while( i < topology(l - 1) ) {
           cum = cum +
             arrPrev(i) * arrWeights(ofsWeight(l) + (topology(l - 1) + 1) * j + i)
+          i = i + 1
+        }
         cum = cum +
           arrWeights(ofsWeight(l) + (topology(l - 1) + 1) * j + topology(l - 1)) // bias
         arrCur(j) = g(cum)
+        j = j + 1
       }
       arrPrev = arrCur
+      l = l + 1
     }
     arrPrev
   }
@@ -126,13 +141,14 @@ class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topol
   def predictV(testData: Vector): Vector = {
     predictPointV(testData, weights)
   }
+
 }
 
 class ArtificialNeuralNetwork private(
-                           private var topology: Array[Int],
-                           private var numIterations: Int,
-                           private var stepSize: Double,
-                           private var miniBatchFraction: Double)
+    private var topology: Array[Int],
+    private var numIterations: Int,
+    private var stepSize: Double,
+    private var miniBatchFraction: Double)
   extends Serializable {
 
   private val gradient = new ANNLeastSquaresGradient(topology)
@@ -158,60 +174,80 @@ class ArtificialNeuralNetwork private(
 object ArtificialNeuralNetwork {
 
   def train(
-             input: RDD[(Vector, Vector)],
-             topology: Array[Int],
-             initialWeights: Vector,
-             numIterations: Int,
-             stepSize: Double,
-             miniBatchFraction: Double): ArtificialNeuralNetworkModel = {
+      input: RDD[(Vector, Vector)],
+      topology: Array[Int],
+      initialWeights: Vector,
+      numIterations: Int,
+      stepSize: Double,
+      miniBatchFraction: Double): ArtificialNeuralNetworkModel = {
     new ArtificialNeuralNetwork(topology, numIterations, stepSize, miniBatchFraction)
       .run(input, initialWeights)
   }
 
   def train(
-             input: RDD[(Vector, Vector)],
-             topology: Array[Int],
-             initialWeights: Vector,
-             numIterations: Int,
-             stepSize: Double): ArtificialNeuralNetworkModel = {
+      input: RDD[(Vector, Vector)],
+      topology: Array[Int],
+      initialWeights: Vector,
+      numIterations: Int,
+      stepSize: Double): ArtificialNeuralNetworkModel = {
     new ArtificialNeuralNetwork(topology, numIterations, stepSize, 1.0).run(input, initialWeights)
   }
 
   def train(
-             input: RDD[(Vector, Vector)],
-             topology: Array[Int],
-             numIterations: Int,
-             stepSize: Double,
-             miniBatchFraction: Double): ArtificialNeuralNetworkModel = {
+      input: RDD[(Vector,Vector)],
+      model: ArtificialNeuralNetworkModel,
+      numIterations: Int,
+      stepSize: Double): ArtificialNeuralNetworkModel = {
+    train(input, model.topology, model.weights, numIterations, stepSize)
+  }
+
+
+  def train(
+      input: RDD[(Vector, Vector)],
+      topology: Array[Int],
+      numIterations: Int,
+      stepSize: Double,
+      miniBatchFraction: Double): ArtificialNeuralNetworkModel = {
     new ArtificialNeuralNetwork(topology, numIterations, stepSize, miniBatchFraction)
       .run(input, randomWeights(topology))
   }
 
   def train(
-             input: RDD[(Vector, Vector)],
-             topology: Array[Int],
-             numIterations: Int,
-             stepSize: Double): ArtificialNeuralNetworkModel = {
+      input: RDD[(Vector, Vector)],
+      topology: Array[Int],
+      numIterations: Int,
+      stepSize: Double): ArtificialNeuralNetworkModel = {
     train(input, topology, numIterations, stepSize, 1.0)
   }
 
   def randomWeights(topology: Array[Int]): Vector = {
     val rand = new XORShiftRandom()
+    
+    var i: Int = 0
+    var l: Int = 0
+    
     val noWeights = {
       var tmp = 0
-      for (i <- 1 until topology.size) {
+      var i = 1
+      while(i < topology.size) {
         tmp = tmp + topology(i) * (topology(i - 1) + 1)
+        i = i + 1
       }
       tmp
     }
 
     val initialWeightsArr = new Array[Double](noWeights)
     var pos = 0;
-    for (l <- 1 until topology.length) {
-      for (i <- 0 until (topology(l) * (topology(l - 1) + 1))) {
+    
+    l = 1
+    while( l < topology.length) {
+      i = 0
+      while(i < (topology(l) * (topology(l - 1) + 1))) {
         initialWeightsArr(pos) = (rand.nextDouble * 4.8 - 2.4) / (topology(l - 1) + 1)
         pos += 1;
+        i += 1
       }
+      l += 1
     }
     Vectors.dense(initialWeightsArr)
   }
@@ -226,8 +262,10 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
 
   private val noWeights = {
     var tmp = 0
-    for (i <- 1 to L) {
-      tmp = tmp + topology(i) * (topology(i - 1) + 1)
+    var l = 1
+    while(l <= L) {
+      tmp = tmp + topology(l) * (topology(l - 1) + 1)
+      l += 1
     }
     tmp
   }
@@ -236,17 +274,21 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
     val tmp = new Array[Int](L + 1)
     var curPos = 0;
     tmp(0) = 0;
-    for (l <- 1 to L) {
+    var l = 1
+    while(l <= L) {
       tmp(l) = curPos
       curPos = curPos + (topology(l - 1) + 1) * (topology(l))
+      l += 1
     }
     tmp
   }
 
   val noNodes: Int = {
     var tmp: Integer = 0
-    for (l <- 0 until topology.size) {
+    var l = 0
+    while(l < topology.size) {
       tmp = tmp + topology(l)
+      l += 1
     }
     tmp
   }
@@ -254,8 +296,10 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
   val ofsNode: Array[Int] = {
     val tmp = new Array[Int](L + 1)
     tmp(0) = 0
-    for (l <- 1 to L) {
+    var l = 1
+    while(l <= L) {
       tmp(l) = tmp(l - 1) + topology(l - 1)
+      l += 1
     }
     tmp
   }
@@ -264,71 +308,102 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
     val arrData = data.toArray
     val arrWeights = weights.toArray
     val arrNodes = new Array[Double](noNodes)
+        
+    var i: Int = 0
+    var j: Int = 0
+    var l: Int = 0
+    
     // forward run
-    for (i <- 0 until topology(0)) {
+    i = 0;
+    while(i < topology(0)) {
       arrNodes(i) = arrData(i)
+      i += 1
     }
-    for (l <- 1 to L) {
-      for (j <- 0 until topology(l)) {
+    l = 1
+    while( l <= L ) {
+      j = 0
+      while(j < topology(l)) {
         var cum: Double = 0.0;
-        for (i <- 0 until topology(l - 1)) {
+        i = 0
+        while(i < topology(l - 1)) {
           cum = cum +
             arrWeights(ofsWeight(l) + (topology(l - 1) + 1) * j + i) *
               arrNodes(ofsNode(l - 1) + i)
+          i += 1
         }
         cum = cum + arrWeights(ofsWeight(l) + (topology(l - 1) + 1) * j + topology(l - 1))
         arrNodes(ofsNode(l) + j) = g(cum)
+        j += 1
       }
+      l += 1
     }
     val arrDiff = new Array[Double](topology(L))
-    for (j <- 0 until topology(L)) {
+    j = 0
+    while( j < topology(L)) {
       arrDiff(j) = (arrNodes(ofsNode(L) + j) - arrData(topology(0) + j))
+      j += 1
     }
     var err: Double = 0;
-    for (j <- 0 until topology(L)) {
+    j = 0
+    while(j < topology(L)) {
       err = err + arrDiff(j) * arrDiff(j)
+      j += 1
     }
     err = err * .5
     // back propagation
     val arrDelta = new Array[Double](noNodes)
-    for (j <- 0 until topology(L)) {
+    j = 0
+    while(j < topology(L)) {
       arrDelta(ofsNode(L) + j) =
         arrDiff(j) *
           arrNodes(ofsNode(L) + j) * (1 - arrNodes(ofsNode(L) + j))
+      j += 1
     }
-    for (l <- L - 1 until 0 by -1) {
-      for (j <- 0 until topology(l)) {
+    l = L - 1
+    while(l > 0) {
+      j = 0
+      while(j < topology(l)) {
         var cum: Double = 0.0
-        for (i <- 0 until topology(l + 1)) {
+        i = 0
+        while( i < topology(l + 1)) {
           cum = cum +
             arrWeights(ofsWeight(l + 1) + (topology(l) + 1) * i + j) *
               arrDelta(ofsNode(l + 1) + i) *
               arrNodes(ofsNode(l) + j) * (1 - arrNodes(ofsNode(l) + j))
+          i += 1
         }
         arrDelta(ofsNode(l) + j) = cum
+        j += 1
       }
+      l -= 1
     }
     // gradient
     val arrGrad = new Array[Double](noWeights)
-    for (l <- 1 to L) {
-      for (j <- 0 until topology(l)) {
-        for (i <- 0 until topology(l - 1)) {
+    l = 1
+    while(l <= L) {
+      j = 0
+      while(j < topology(l)) {
+        i = 0
+        while(i < topology(l - 1)) {
           arrGrad(ofsWeight(l) + (topology(l - 1) + 1) * j + i) =
             arrNodes(ofsNode(l - 1) + i) *
               arrDelta(ofsNode(l) + j)
+          i += 1
         }
         arrGrad(ofsWeight(l) + (topology(l - 1) + 1) * j + topology(l - 1)) =
           arrDelta(ofsNode(l) + j)
+        j += 1
       }
+      l += 1
     }
     (Vectors.dense(arrGrad), err)
   }
 
   override def compute(
-                        data: Vector,
-                        label: Double,
-                        weights: Vector,
-                        cumGradient: Vector): Double = {
+      data: Vector,
+      label: Double,
+      weights: Vector,
+      cumGradient: Vector): Double = {
     val (grad, err) = compute(data, label, weights)
     cumGradient.toBreeze += grad.toBreeze
     return err
@@ -338,14 +413,15 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
 private class ANNUpdater extends Updater {
 
   override def compute(
-                        weightsOld: Vector,
-                        gradient: Vector,
-                        stepSize: Double,
-                        iter: Int,
-                        regParam: Double): (Vector, Double) = {
+      weightsOld: Vector,
+      gradient: Vector,
+      stepSize: Double,
+      iter: Int,
+      regParam: Double): (Vector, Double) = {
     val thisIterStepSize = stepSize
     val brzWeights: BV[Double] = weightsOld.toBreeze.toDenseVector
     brzAxpy(-thisIterStepSize, gradient.toBreeze, brzWeights)
     (Vectors.fromBreeze(brzWeights), 0)
   }
+
 }

From 71ca72734d7d7ec3c8e2447a1861f1ca72e467c4 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Wed, 10 Sep 2014 14:01:46 +0800
Subject: [PATCH 033/143] Update and rename TestParallelANN.scala to
 TestANN.scala

Adapted test to new interface
---
 .../org/apache/spark/mllib/ann/TestANN.scala  | 581 ++++++++++++++++++
 .../spark/mllib/ann/TestParallelANN.scala     | 269 --------
 2 files changed, 581 insertions(+), 269 deletions(-)
 create mode 100644 mllib/src/test/scala/org/apache/spark/mllib/ann/TestANN.scala
 delete mode 100644 mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestANN.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestANN.scala
new file mode 100644
index 0000000000000..9e6f59df3a11e
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestANN.scala
@@ -0,0 +1,581 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.spark.mllib.ann
+
+import java.awt._
+import java.awt.event._
+import java.text.SimpleDateFormat
+import java.util.Calendar
+import org.apache.spark._
+import org.apache.spark.mllib.ann._
+import org.apache.spark.mllib.linalg._
+import org.apache.spark.mllib.regression._
+import org.apache.spark.rdd.RDD
+import scala.Array.canBuildFrom
+import scala.util.Random
+
+object windowAdapter extends WindowAdapter {
+
+  override def windowClosing( e: WindowEvent ) {
+    System.exit(0)
+  }
+
+}
+
+class OutputCanvas2D( wd: Int, ht: Int ) extends Canvas {
+
+  var points: Array[Vector] = null
+  var approxPoints: Array[Vector] = null
+
+  /* input: rdd of (x,y) vectors */
+  def setData( rdd: RDD[Vector] ) {
+    points = rdd.collect
+    repaint
+  }
+
+  def setApproxPoints( rdd: RDD[Vector] ) {
+    approxPoints = rdd.collect
+    repaint
+  }
+
+  def plotDot( g: Graphics, x: Int, y: Int ) {
+    val r = 5
+    val noSamp = 6*r
+    var x1 = x
+    var y1 = y + r
+    for( j <- 1 to noSamp ) {
+      val x2 = (x.toDouble + math.sin( j.toDouble*2*math.Pi/noSamp )*r + .5).toInt
+      val y2 = (y.toDouble + math.cos( j.toDouble*2*math.Pi/noSamp )*r + .5).toInt
+      g.drawLine( x1, ht - y1, x2, ht - y2 )
+      x1 = x2
+      y1 = y2
+    }
+  }
+
+  override def paint( g: Graphics) = {
+
+    var xmax: Double = 0.0
+    var xmin: Double = 0.0
+    var ymax: Double = 0.0
+    var ymin: Double = 0.0
+
+    if( points!=null ) {
+
+      g.setColor( Color.black )
+      val x = points.map( T => (T.toArray)(0) )
+      val y = points.map( T => (T.toArray)(1) )
+
+      xmax = x.max
+      xmin = x.min
+      ymax = y.max
+      ymin = y.min
+
+      for( i <- 0 to x.size - 1 ) {
+
+        val xr = (((x(i).toDouble - xmin)/(xmax - xmin))*wd + .5).toInt
+        val yr = (((y(i).toDouble - ymin)/(ymax - ymin))*ht + .5).toInt
+        plotDot( g, xr, yr )
+
+      }
+
+      if( approxPoints != null ) {
+
+        g.setColor( Color.red )
+        val x = approxPoints.map( T => (T.toArray)(0) )
+        val y = approxPoints.map( T => (T.toArray)(1) )
+
+        for( i <- 0 to x.size-1 ) {
+          val xr = (((x(i).toDouble - xmin)/(xmax - xmin))*wd + .5).toInt
+          val yr = (((y(i).toDouble - ymin)/(ymax - ymin))*ht + .5).toInt
+          plotDot( g, xr, yr )
+        }
+
+      }
+
+    }
+
+  }
+
+}
+
+class OutputFrame2D( title: String ) extends Frame( title ) {
+
+  val wd = 800
+  val ht = 600
+
+  var outputCanvas = new OutputCanvas2D( wd, ht )
+
+  def apply() {
+    addWindowListener( windowAdapter )
+    setSize( wd, ht )
+    add( "Center", outputCanvas )
+    show()
+  }
+
+  def setData( rdd: RDD[Vector] ) {
+    outputCanvas.setData( rdd )
+  }
+
+  def setApproxPoints( rdd: RDD[Vector] ) {
+    outputCanvas.setApproxPoints( rdd )
+  }
+
+
+}
+
+object windowAdapter3D extends WindowAdapter {
+
+  override def windowClosing( e: WindowEvent ) {
+    System.exit(0)
+  }
+
+}
+
+class OutputCanvas3D( wd: Int, ht: Int, shadowFrac: Double ) extends Canvas {
+
+  var angle: Double = 0
+  var points: Array[Vector] = null
+  var approxPoints: Array[Vector] = null
+
+  /* 3 dimensional (x,y,z) vector */
+  def setData( rdd: RDD[Vector] ) {
+    points = rdd.collect
+    repaint
+  }
+
+  def setApproxPoints( rdd: RDD[Vector] ) {
+    approxPoints = rdd.collect
+    repaint
+  }
+
+  def plotDot( g: Graphics, x: Int, y: Int ) {
+    val r = 5
+    val noSamp = 6*r
+    var x1 = x
+    var y1 = y + r
+    for( j <- 1 to noSamp ) {
+      val x2 = (x.toDouble + math.sin( j.toDouble*2*math.Pi/noSamp )*r + .5).toInt
+      val y2 = (y.toDouble + math.cos( j.toDouble*2*math.Pi/noSamp )*r + .5).toInt
+      g.drawLine( x1, ht - y1, x2, ht - y2 )
+      x1 = x2
+      y1 = y2
+    }
+  }
+
+  def plotLine( g: Graphics, x1: Int, y1: Int, x2: Int, y2: Int ) {
+    g.drawLine( x1, ht - y1, x2, ht - y2 )
+  }
+
+  def calcCord( arr: Array[Double], angle: Double ): (Double, Double, Double, Double, Double, Double) = {
+
+    var arrOut = new Array[Double](6)
+
+    val x = arr(0)*math.cos( angle ) - arr(1)*math.sin( angle )
+    val y = arr(0)*math.sin( angle ) + arr(1)*math.cos( angle )
+    val z = arr(2)
+
+    val x0 = arr(0)*math.cos( angle ) - arr(1)*math.sin( angle )
+    val y0 = arr(0)*math.sin( angle ) + arr(1)*math.cos( angle )
+    val z0 = 0
+
+    val xs = (arr(0) + shadowFrac*arr(2))*math.cos( angle ) - arr(1)*math.sin( angle )
+    val ys = (arr(0) + shadowFrac*arr(2))*math.sin( angle ) + arr(1)*math.cos( angle )
+    val zs = 0
+
+    arrOut(0) = y - .5*x
+    arrOut(1) = z - .25*x
+
+    arrOut(2) = y0 - .5*x0
+    arrOut(3) = z0 - .25*x0
+
+    arrOut(4) = ys - .5*xs
+    arrOut(5) = zs - .25*xs
+
+    ( arrOut(0), arrOut(1), arrOut(2), arrOut(3), arrOut(4), arrOut(5) )
+
+  }
+
+  override def paint( g: Graphics) = {
+
+    if( points!=null ) {
+
+      var p = points.map( T => calcCord( T.toArray, angle ) ).toArray
+
+      var xmax = p(0)._1
+      var xmin = p(0)._1
+      var ymax = p(0)._2
+      var ymin = p(0)._2
+
+      for( i <- 0 to p.size-1 ) {
+
+        if( xmax<p(i)._1 ) {
+          xmax = p(i)._1
+        }
+        if( xmax<p(i)._3 ) {
+          xmax = p(i)._3
+        }
+        if( xmax<p(i)._5 ) {
+          xmax = p(i)._5
+        }
+
+        if( xmin>p(i)._1 ) {
+          xmin = p(i)._1
+        }
+        if( xmin>p(i)._3 ) {
+          xmin = p(i)._3
+        }
+        if( xmin>p(i)._5 ) {
+          xmin = p(i)._5
+        }
+
+        if( ymax<p(i)._2 ) {
+          ymax = p(i)._2
+        }
+        if( ymax<p(i)._4 ) {
+          ymax = p(i)._4
+        }
+        if( ymax<p(i)._6 ) {
+          ymax = p(i)._6
+        }
+
+        if( ymin>p(i)._2 ) {
+          ymin = p(i)._2
+        }
+        if( ymin>p(i)._4 ) {
+          ymin = p(i)._4
+        }
+        if( ymin>p(i)._6 ) {
+          ymin = p(i)._6
+        }
+
+      }
+
+      for( i <- 0 to p.size-1 ) {
+
+        var x_ = (((p(i)._1 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
+        var y_ = (((p(i)._2 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
+        var x0 = (((p(i)._3 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
+        var y0 = (((p(i)._4 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
+        var xs = (((p(i)._5 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
+        var ys = (((p(i)._6 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
+
+        g.setColor( Color.black )
+        plotDot( g, x_, y_ )
+        plotLine( g, x_, y_, x0, y0 )
+        g.setColor( Color.gray )
+        plotLine( g, x0, y0, xs, ys )
+
+      }
+
+      if( approxPoints != null ) {
+
+        var p = approxPoints.map( T => calcCord( T.toArray, angle ) )
+
+        for( i <- 0 to p.size-1 ) {
+
+          var x_ = (((p(i)._1 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
+          var y_ = (((p(i)._2 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
+          var x0 = (((p(i)._3 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
+          var y0 = (((p(i)._4 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
+          var xs = (((p(i)._5 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
+          var ys = (((p(i)._6 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
+
+          g.setColor( Color.red )
+          plotDot( g, x_, y_ )
+          plotLine( g, x_, y_, x0, y0 )
+          g.setColor( Color.magenta )
+          plotLine( g, x0, y0, xs, ys )
+
+        }
+
+      }
+
+    }
+  }
+}
+
+class OutputFrame3D( title: String, shadowFrac: Double ) extends Frame( title ) {
+
+  val wd = 800
+  val ht = 600
+
+  def this( title: String ) = this( title, .25 )
+
+  var outputCanvas = new OutputCanvas3D( wd, ht, shadowFrac )
+
+  def apply() {
+    addWindowListener( windowAdapter3D )
+    setSize( wd, ht )
+    add( "Center", outputCanvas )
+    show()
+  }
+
+  def setData( rdd: RDD[Vector] ) {
+    outputCanvas.setData( rdd )
+  }
+
+  def setAngle( angle: Double ) {
+    outputCanvas.angle = angle
+  }
+
+  def setApproxPoints( rdd: RDD[Vector] ) {
+    outputCanvas.setApproxPoints( rdd )
+  }
+
+}
+
+object TestANN {
+
+  var rand = new Random( 0 )
+
+  def generateInput2D( f: Double => Double, xmin: Double, xmax: Double, noPoints: Int ): Array[(Vector,Vector)] =
+  {
+
+    var out = new Array[(Vector,Vector)](noPoints)
+
+    for( i <- 0 to noPoints - 1 ) {
+      val x = xmin + rand.nextDouble()*(xmax - xmin)
+      val y = f(x)
+      out(i) = ( Vectors.dense( x ), Vectors.dense( y ) )
+    }
+
+    return out
+
+  }
+
+
+  def generateInput3D( f: (Double,Double) => Double, xmin: Double, xmax: Double, ymin: Double, ymax: Double, noPoints: Int ): Array[(Vector,Vector)] = {
+
+    var out = new Array[(Vector,Vector)](noPoints)
+
+    for( i <- 0 to noPoints - 1 ) {
+
+      val x = xmin + rand.nextDouble()*(xmax - xmin)
+      val y = ymin + rand.nextDouble()*(ymax - ymin)
+      val z = f( x, y )
+
+      var arr = new Array[Double](2)
+
+      arr(0) = x
+      arr(1) = y
+      out(i) = ( Vectors.dense( arr ), Vectors.dense( z ) )
+
+    }
+
+    out
+
+  }
+
+  def generateInput4D( f: Double => (Double,Double,Double), tmin: Double, tmax: Double, noPoints: Int ): Array[(Vector,Vector)] = {
+
+    var out = new Array[(Vector,Vector)](noPoints)
+
+    for( i <- 0 to noPoints - 1 ) {
+
+      val t: Double = tmin + rand.nextDouble()*(tmax - tmin)
+      var arr = new Array[Double](3)
+      var F = f(t)
+
+      arr(0) = F._1
+      arr(1) = F._2
+      arr(2) = F._3
+
+      out(i) = ( Vectors.dense( t ), Vectors.dense( arr ) )
+    }
+
+    out
+
+  }
+
+  def f( T: Double ): Double = {
+    val y = 0.5 + Math.abs(T/5).toInt.toDouble*.15 + math.sin(T*math.Pi/10)*.1
+    assert( y<= 1)
+    y
+  }
+
+  def f3D( x: Double, y: Double ): Double = {
+    .5 + .24*Math.sin( x*2*math.Pi/10 ) + .24*Math.cos( y*2*math.Pi/10 )
+  }
+
+  def f4D( t: Double ): (Double, Double,Double) = {
+    val x = Math.abs(.8*Math.cos( t*2*math.Pi/20 ) ) + .1
+    val y = (11 + t)/22
+    val z = .5 + .35*Math.sin(t*2*math.Pi/5)*Math.cos( t*2*math.Pi/10 ) + .15*t/11
+    ( x, y, z )
+  }
+
+  def concat( v1: Vector, v2: Vector ): Vector = {
+
+    var a1 = v1.toArray
+    var a2 = v2.toArray
+    var a3 = new Array[Double]( a1.size + a2.size )
+
+    for( i <- 0 to a1.size - 1 ) {
+      a3(i) = a1(i)
+    }
+
+    for( i <- 0 to a2.size - 1 ) {
+      a3(i + a1.size) = a2(i)
+    }
+
+    Vectors.dense( a3 )
+
+  }
+
+  def main( arg: Array[String] ) {
+
+    println( "ANN tester" )
+    println
+
+    val formatter = new SimpleDateFormat("hh:mm:ss")    
+
+    var curAngle: Double = 0.0
+    var graphic: Boolean = false
+
+    if( (arg.length>0) && (arg(0)=="graph" ) ) {
+      graphic = true
+    }
+
+    var outputFrame2D: OutputFrame2D = null
+    var outputFrame3D: OutputFrame3D = null
+    var outputFrame4D: OutputFrame3D = null
+
+    if( graphic ) {
+
+      outputFrame2D = new OutputFrame2D( "x -> y" )
+      outputFrame2D.apply
+
+      outputFrame3D = new OutputFrame3D( "(x,y) -> z", 1 )
+      outputFrame3D.apply
+
+      outputFrame4D = new OutputFrame3D( "t -> (x,y,z)" )
+      outputFrame4D.apply
+
+    }
+
+    var A = 20.0
+    var B = 50.0
+
+    var conf = new SparkConf().setAppName("Parallel ANN").setMaster("local[1]")
+    var sc = new SparkContext(conf)
+
+    val testRDD2D = sc.parallelize( generateInput2D( T => f(T), -10, 10, 100 ), 2).cache
+    val testRDD3D = sc.parallelize( generateInput3D( (x,y) => f3D(x,y), -10, 10, -10, 10, 200 ), 2).cache
+    val testRDD4D = sc.parallelize( generateInput4D( t => f4D(t), -10, 10, 100 ), 2 ).cache
+    
+    val validationRDD2D = sc.parallelize( generateInput2D( T => f(T), -10, 10, 100 ), 2).cache
+    val validationRDD3D = sc.parallelize( generateInput3D( (x,y) => f3D(x,y), -10, 10, -10, 10, 100 ), 2).cache
+    val validationRDD4D = sc.parallelize( generateInput4D( t => f4D(t), -10, 10, 100 ), 2 ).cache
+
+    if( graphic ) {
+
+      outputFrame2D.setData( testRDD2D.map( T => concat( T._1, T._2 ) ) )
+      outputFrame3D.setData( testRDD3D.map( T => concat( T._1, T._2 ) ) )
+      outputFrame4D.setData( testRDD4D.map( T => T._2 ) )
+
+    }
+    
+    val starttime = Calendar.getInstance().getTime()
+    println( "Start training " + starttime )
+
+    var model2D = ArtificialNeuralNetwork.train( testRDD2D, Array[Int](1, 3, 3, 1), 1000, 1.0)
+    var model3D = ArtificialNeuralNetwork.train( testRDD3D, Array[Int](2, 20, 1), 1000, 1.0)
+    var model4D = ArtificialNeuralNetwork.train( testRDD4D, Array[Int](1, 20, 3), 1000, 1.0 )
+
+    val noIt = 1500
+    var errHist = new Array[(Int,Double,Double,Double)]( noIt )
+
+    for( i <- 0 to noIt - 1 ) {
+
+      val predictedAndTarget2D = validationRDD2D.map( T => ( T._1, T._2, model2D.predictV( T._1 ) ) )
+      val predictedAndTarget3D = validationRDD3D.map( T => ( T._1, T._2, model3D.predictV( T._1 ) ) )
+      val predictedAndTarget4D = validationRDD4D.map( T => ( T._1, T._2, model4D.predictV( T._1 ) ) )
+
+      var err2D = predictedAndTarget2D.map( T =>
+        (T._3.toArray(0) - T._2.toArray(0))*(T._3.toArray(0) - T._2.toArray(0))
+      ).reduce( (u,v) => u + v )
+
+      var err3D = predictedAndTarget3D.map( T =>
+        (T._3.toArray(0) - T._2.toArray(0))*(T._3.toArray(0) - T._2.toArray(0))
+      ).reduce( (u,v) => u + v )
+
+      var err4D = predictedAndTarget4D.map( T => {
+
+        val v1 = T._2.toArray
+        val v2 = T._3.toArray
+
+        (v1(0) - v2(0))*(v1(0) - v2(0)) +
+        (v1(1) - v2(1))*(v1(1) - v2(1)) +
+        (v1(2) - v2(2))*(v1(2) - v2(2))
+
+      } ).reduce( (u,v) => u + v )
+
+
+      if( graphic ) {
+
+        val predicted2D = predictedAndTarget2D.map(
+          T => concat( T._1, T._3 )
+        )
+
+        val predicted3D = predictedAndTarget3D.map(
+          T => concat( T._1, T._3 )
+        )
+
+        val predicted4D = predictedAndTarget4D.map(
+          T => T._3
+        )
+
+        curAngle = curAngle + math.Pi/4
+        if( curAngle>=2*math.Pi ) {
+          curAngle = curAngle - 2*math.Pi
+        }
+
+        outputFrame3D.setAngle( curAngle )
+        outputFrame4D.setAngle( curAngle )
+
+        outputFrame2D.setApproxPoints( predicted2D )
+        outputFrame3D.setApproxPoints( predicted3D )
+        outputFrame4D.setApproxPoints( predicted4D )
+
+      }
+
+      println( "It. "+i+" ("+Calendar.getInstance().getTime()+"), Error 2D/3D/4D: " + (err2D, err3D, err4D) )
+      errHist(i) = ( i, err2D, err3D, err4D )
+
+      if( i < noIt - 1 ) {
+        model2D = ArtificialNeuralNetwork.train(testRDD2D, model2D, 1000, 1.0)
+        model3D = ArtificialNeuralNetwork.train(testRDD3D, model3D, 1000, 1.0)
+        model4D = ArtificialNeuralNetwork.train(testRDD4D, model4D, 1000, 1.0)
+      }
+
+    }
+
+    sc.stop
+
+    val stoptime = Calendar.getInstance().getTime()
+
+    for( i <- 0 to noIt - 1 ) {
+      println( errHist(i) )
+    }
+
+    println( formatter.format( starttime )+"-" + formatter.format( stoptime ) + " "+(stoptime.getTime-starttime.getTime+500)/1000+" seconds" )
+
+  }
+
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
deleted file mode 100644
index b528da19dbfb9..0000000000000
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
+++ /dev/null
@@ -1,269 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.spark.mllib.ann
-
-import java.text.SimpleDateFormat
-import java.util.Calendar
-
-import org.apache.spark._
-import org.apache.spark.mllib.linalg._
-
-import scala.util.Random
-
-object TestParallelANN {
-
-  var rand = new Random(0)
-
-  def generateInput2D(f: Double => Double, xmin: Double, xmax: Double, noPoints: Int):
-  Array[(Vector, Vector)] = {
-
-    var out = new Array[(Vector, Vector)](noPoints)
-
-    for (i <- 0 to noPoints - 1) {
-      val x = xmin + rand.nextDouble() * (xmax - xmin)
-      val y = f(x)
-      out(i) = (Vectors.dense(x), Vectors.dense(y))
-    }
-
-    return out
-
-  }
-
-
-  def generateInput3D(f: (Double, Double) => Double, xmin: Double, xmax: Double, ymin: Double, ymax: Double, noPoints: Int): Array[(Vector, Vector)] = {
-
-    var out = new Array[(Vector, Vector)](noPoints)
-
-    for (i <- 0 to noPoints - 1) {
-
-      val x = xmin + rand.nextDouble() * (xmax - xmin)
-      val y = ymin + rand.nextDouble() * (ymax - ymin)
-      val z = f(x, y)
-
-      var arr = new Array[Double](2)
-
-      arr(0) = x
-      arr(1) = y
-      out(i) = (Vectors.dense(arr), Vectors.dense(z))
-
-    }
-
-    out
-
-  }
-
-  def generateInput4D(f: Double => (Double, Double, Double), tmin: Double, tmax: Double, noPoints: Int): Array[(Vector, Vector)] = {
-
-    var out = new Array[(Vector, Vector)](noPoints)
-
-    for (i <- 0 to noPoints - 1) {
-
-      val t: Double = tmin + rand.nextDouble() * (tmax - tmin)
-      var arr = new Array[Double](3)
-      var F = f(t)
-
-      arr(0) = F._1
-      arr(1) = F._2
-      arr(2) = F._3
-
-      out(i) = (Vectors.dense(t), Vectors.dense(arr))
-    }
-
-    out
-
-  }
-
-  def f(T: Double): Double = {
-    val y = 0.5 + Math.abs(T / 5).toInt.toDouble * .15 + math.sin(T * math.Pi / 10) * .1
-    assert(y <= 1)
-    y
-  }
-
-  def f3D(x: Double, y: Double): Double = {
-    .5 +.24 * Math.sin(x * 2 * math.Pi / 10) +.24 * Math.cos(y * 2 * math.Pi / 10)
-  }
-
-  def f4D(t: Double): (Double, Double, Double) = {
-    val x = Math.abs(.8 * Math.cos(t * 2 * math.Pi / 20)) + .1
-    val y = (11 + t) / 22
-    val z =.5 +.35 * Math.sin(t * 2 * math.Pi / 5) * Math.cos(t * 2 * math.Pi / 10) +.15 * t / 11
-    (x, y, z)
-  }
-
-  def concat(v1: Vector, v2: Vector): Vector = {
-
-    var a1 = v1.toArray
-    var a2 = v2.toArray
-    var a3 = new Array[Double](a1.size + a2.size)
-
-    for (i <- 0 to a1.size - 1) {
-      a3(i) = a1(i)
-    }
-
-    for (i <- 0 to a2.size - 1) {
-      a3(i + a1.size) = a2(i)
-    }
-
-    Vectors.dense(a3)
-
-  }
-
-  def main(arg: Array[String]) {
-
-    println("Parallel ANN tester")
-    println
-
-    val formatter = new SimpleDateFormat("hh:mm:ss")
-
-    var curAngle: Double = 0.0
-    var graphic: Boolean = false
-
-    if ((arg.length > 0) && (arg(0) == "graph")) {
-      graphic = true
-    }
-
-    var outputFrame2D: OutputFrame2D = null
-    var outputFrame3D: OutputFrame3D = null
-    var outputFrame4D: OutputFrame3D = null
-
-    if (graphic) {
-
-      outputFrame2D = new OutputFrame2D("x -> y")
-      outputFrame2D.apply
-
-      outputFrame3D = new OutputFrame3D("(x,y) -> z", 1)
-      outputFrame3D.apply
-
-      outputFrame4D = new OutputFrame3D("t -> (x,y,z)")
-      outputFrame4D.apply
-
-    }
-
-    var A = 20.0
-    var B = 50.0
-
-    var conf = new SparkConf().setAppName("Parallel ANN").setMaster("local[1]")
-    var sc = new SparkContext(conf)
-
-    val testRDD2D = sc.parallelize(generateInput2D(T => f(T), -10, 10, 100), 2).cache
-    val testRDD3D = sc.parallelize(generateInput3D((x, y) => f3D(x, y), -10, 10, -10, 10, 200), 2).cache
-    val testRDD4D = sc.parallelize(generateInput4D(t => f4D(t), -10, 10, 100), 2).cache
-
-    val validationRDD2D = sc.parallelize(generateInput2D(T => f(T), -10, 10, 100), 2).cache
-    val validationRDD3D = sc.parallelize(generateInput3D((x, y) => f3D(x, y), -10, 10, -10, 10, 100), 2).cache
-    val validationRDD4D = sc.parallelize(generateInput4D(t => f4D(t), -10, 10, 100), 2).cache
-
-    if (graphic) {
-
-      outputFrame2D.setData(testRDD2D.map(T => concat(T._1, T._2)))
-      outputFrame3D.setData(testRDD3D.map(T => concat(T._1, T._2)))
-      outputFrame4D.setData(testRDD4D.map(T => T._2))
-
-    }
-
-    val starttime = Calendar.getInstance().getTime()
-    println("Start training " + starttime)
-
-    val numIterations = 1000
-    val stepSize = 1.0
-    var model2D = ArtificialNeuralNetwork.train(testRDD2D, Array[Int](1, 3, 3, 1), numIterations, stepSize)
-    var model3D = ArtificialNeuralNetwork.train(testRDD3D, Array[Int](2, 20, 1), numIterations, stepSize)
-    var model4D = ArtificialNeuralNetwork.train(testRDD4D, Array[Int](1, 20, 3), numIterations, stepSize)
-
-    val noIt = 1500
-    var errHist = new Array[(Int, Double, Double, Double)](noIt)
-
-    for (i <- 0 to noIt - 1) {
-
-      val predictedAndTarget2D = validationRDD2D.map(T => (T._1, T._2, model2D.predictV(T._1)))
-      val predictedAndTarget3D = validationRDD3D.map(T => (T._1, T._2, model3D.predictV(T._1)))
-      val predictedAndTarget4D = validationRDD4D.map(T => (T._1, T._2, model4D.predictV(T._1)))
-
-      var err2D = predictedAndTarget2D.map(T =>
-        (T._3.toArray(0) - T._2.toArray(0)) * (T._3.toArray(0) - T._2.toArray(0))
-      ).reduce((u, v) => u + v)
-
-      var err3D = predictedAndTarget3D.map(T =>
-        (T._3.toArray(0) - T._2.toArray(0)) * (T._3.toArray(0) - T._2.toArray(0))
-      ).reduce((u, v) => u + v)
-
-      var err4D = predictedAndTarget4D.map(T => {
-
-        val v1 = T._2.toArray
-        val v2 = T._3.toArray
-
-        (v1(0) - v2(0)) * (v1(0) - v2(0)) +
-          (v1(1) - v2(1)) * (v1(1) - v2(1)) +
-          (v1(2) - v2(2)) * (v1(2) - v2(2))
-
-      }).reduce((u, v) => u + v)
-
-
-      if (graphic) {
-
-        val predicted2D = predictedAndTarget2D.map(
-          T => concat(T._1, T._3)
-        )
-
-        val predicted3D = predictedAndTarget3D.map(
-          T => concat(T._1, T._3)
-        )
-
-        val predicted4D = predictedAndTarget4D.map(
-          T => T._3
-        )
-
-        curAngle = curAngle + math.Pi / 4
-        if (curAngle >= 2 * math.Pi) {
-          curAngle = curAngle - 2 * math.Pi
-        }
-
-        outputFrame3D.setAngle(curAngle)
-        outputFrame4D.setAngle(curAngle)
-
-        outputFrame2D.setApproxPoints(predicted2D)
-        outputFrame3D.setApproxPoints(predicted3D)
-        outputFrame4D.setApproxPoints(predicted4D)
-
-      }
-
-      println("It. " + i + " (" + Calendar.getInstance().getTime() + "), Error 2D/3D/4D: " +(err2D, err3D, err4D))
-      errHist(i) = (i, err2D, err3D, err4D)
-
-      if (i < noIt - 1) {
-        model2D = ArtificialNeuralNetwork.train(testRDD2D, model2D.topology, model2D.weights, numIterations, stepSize)
-        model3D = ArtificialNeuralNetwork.train(testRDD3D, model3D.topology, model3D.weights, numIterations, stepSize)
-        model4D = ArtificialNeuralNetwork.train(testRDD4D, model4D.topology, model4D.weights, numIterations, stepSize)
-      }
-
-    }
-
-    sc.stop
-
-    val stoptime = Calendar.getInstance().getTime()
-
-    for (i <- 0 to noIt - 1) {
-      println(errHist(i))
-    }
-
-    println(formatter.format(starttime) + "-" + formatter.format(stoptime) + " " + (stoptime.getTime - starttime.getTime + 500) / 1000 + " seconds")
-
-  }
-
-}

From 293d0137b31fe7fbc7cc6fafcc88fcc7d9412a20 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Wed, 10 Sep 2014 14:04:56 +0800
Subject: [PATCH 034/143] Delete TestParallelANNgraphics.scala

Combined with TestANN
---
 .../mllib/ann/TestParallelANNgraphics.scala   | 334 ------------------
 1 file changed, 334 deletions(-)
 delete mode 100644 mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala
deleted file mode 100644
index e206a8b7072a3..0000000000000
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala
+++ /dev/null
@@ -1,334 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.mllib.ann
-
-import java.awt._
-import java.awt.event._
-import org.apache.spark.rdd.RDD
-import org.apache.spark.mllib.linalg.Vector
-import scala.Array.canBuildFrom
-
-object windowAdapter extends WindowAdapter {
-
-  override def windowClosing( e: WindowEvent ) {
-    System.exit(0)
-  }
-
-}
-
-class OutputCanvas2D( wd: Int, ht: Int ) extends Canvas {
-
-  var points: Array[Vector] = null
-  var approxPoints: Array[Vector] = null
-
-  /* input: rdd of (x,y) vectors */
-  def setData( rdd: RDD[Vector] ) {
-    points = rdd.collect
-    repaint
-  }
-
-  def setApproxPoints( rdd: RDD[Vector] ) {
-    approxPoints = rdd.collect
-    repaint
-  }
-
-  def plotDot( g: Graphics, x: Int, y: Int ) {
-    val r = 5
-    val noSamp = 6*r
-    var x1 = x
-    var y1 = y + r
-    for( j <- 1 to noSamp ) {
-      val x2 = (x.toDouble + math.sin( j.toDouble*2*math.Pi/noSamp )*r + .5).toInt
-      val y2 = (y.toDouble + math.cos( j.toDouble*2*math.Pi/noSamp )*r + .5).toInt
-      g.drawLine( x1, ht - y1, x2, ht - y2 )
-      x1 = x2
-      y1 = y2
-    }
-  }
-
-  override def paint( g: Graphics) = {
-
-    var xmax: Double = 0.0
-    var xmin: Double = 0.0
-    var ymax: Double = 0.0
-    var ymin: Double = 0.0
-
-    if( points!=null ) {
-
-      g.setColor( Color.black )
-      val x = points.map( T => (T.toArray)(0) )
-      val y = points.map( T => (T.toArray)(1) )
-
-      xmax = x.max
-      xmin = x.min
-      ymax = y.max
-      ymin = y.min
-
-      for( i <- 0 to x.size - 1 ) {
-
-        val xr = (((x(i).toDouble - xmin)/(xmax - xmin))*wd + .5).toInt
-        val yr = (((y(i).toDouble - ymin)/(ymax - ymin))*ht + .5).toInt
-        plotDot( g, xr, yr )
-
-      }
-
-      if( approxPoints != null ) {
-
-        g.setColor( Color.red )
-        val x = approxPoints.map( T => (T.toArray)(0) )
-        val y = approxPoints.map( T => (T.toArray)(1) )
-
-        for( i <- 0 to x.size-1 ) {
-          val xr = (((x(i).toDouble - xmin)/(xmax - xmin))*wd + .5).toInt
-          val yr = (((y(i).toDouble - ymin)/(ymax - ymin))*ht + .5).toInt
-          plotDot( g, xr, yr )
-        }
-
-      }
-
-    }
-
-  }
-
-}
-
-class OutputFrame2D( title: String ) extends Frame( title ) {
-
-  val wd = 800
-  val ht = 600
-
-  var outputCanvas = new OutputCanvas2D( wd, ht )
-
-  def apply() {
-    addWindowListener( windowAdapter )
-    setSize( wd, ht )
-    add( "Center", outputCanvas )
-    show()
-  }
-
-  def setData( rdd: RDD[Vector] ) {
-    outputCanvas.setData( rdd )
-  }
-
-  def setApproxPoints( rdd: RDD[Vector] ) {
-    outputCanvas.setApproxPoints( rdd )
-  }
-
-
-}
-
-object windowAdapter3D extends WindowAdapter {
-
-  override def windowClosing( e: WindowEvent ) {
-    System.exit(0)
-  }
-
-}
-
-class OutputCanvas3D( wd: Int, ht: Int, shadowFrac: Double ) extends Canvas {
-
-  var angle: Double = 0
-  var points: Array[Vector] = null
-  var approxPoints: Array[Vector] = null
-
-  /* 3 dimensional (x,y,z) vector */
-  def setData( rdd: RDD[Vector] ) {
-    points = rdd.collect
-    repaint
-  }
-
-  def setApproxPoints( rdd: RDD[Vector] ) {
-    approxPoints = rdd.collect
-    repaint
-  }
-
-  def plotDot( g: Graphics, x: Int, y: Int ) {
-    val r = 5
-    val noSamp = 6*r
-    var x1 = x
-    var y1 = y + r
-    for( j <- 1 to noSamp ) {
-      val x2 = (x.toDouble + math.sin( j.toDouble*2*math.Pi/noSamp )*r + .5).toInt
-      val y2 = (y.toDouble + math.cos( j.toDouble*2*math.Pi/noSamp )*r + .5).toInt
-      g.drawLine( x1, ht - y1, x2, ht - y2 )
-      x1 = x2
-      y1 = y2
-    }
-  }
-
-  def plotLine( g: Graphics, x1: Int, y1: Int, x2: Int, y2: Int ) {
-    g.drawLine( x1, ht - y1, x2, ht - y2 )
-  }
-
-  def calcCord( arr: Array[Double], angle: Double ): (Double, Double, Double, Double, Double, Double) = {
-
-    var arrOut = new Array[Double](6)
-
-    val x = arr(0)*math.cos( angle ) - arr(1)*math.sin( angle )
-    val y = arr(0)*math.sin( angle ) + arr(1)*math.cos( angle )
-    val z = arr(2)
-
-    val x0 = arr(0)*math.cos( angle ) - arr(1)*math.sin( angle )
-    val y0 = arr(0)*math.sin( angle ) + arr(1)*math.cos( angle )
-    val z0 = 0
-
-    val xs = (arr(0) + shadowFrac*arr(2))*math.cos( angle ) - arr(1)*math.sin( angle )
-    val ys = (arr(0) + shadowFrac*arr(2))*math.sin( angle ) + arr(1)*math.cos( angle )
-    val zs = 0
-
-    arrOut(0) = y - .5*x
-    arrOut(1) = z - .25*x
-
-    arrOut(2) = y0 - .5*x0
-    arrOut(3) = z0 - .25*x0
-
-    arrOut(4) = ys - .5*xs
-    arrOut(5) = zs - .25*xs
-
-    ( arrOut(0), arrOut(1), arrOut(2), arrOut(3), arrOut(4), arrOut(5) )
-
-  }
-
-  override def paint( g: Graphics) = {
-
-    if( points!=null ) {
-
-      var p = points.map( T => calcCord( T.toArray, angle ) ).toArray
-
-      var xmax = p(0)._1
-      var xmin = p(0)._1
-      var ymax = p(0)._2
-      var ymin = p(0)._2
-
-      for( i <- 0 to p.size-1 ) {
-
-        if( xmax<p(i)._1 ) {
-          xmax = p(i)._1
-        }
-        if( xmax<p(i)._3 ) {
-          xmax = p(i)._3
-        }
-        if( xmax<p(i)._5 ) {
-          xmax = p(i)._5
-        }
-
-        if( xmin>p(i)._1 ) {
-          xmin = p(i)._1
-        }
-        if( xmin>p(i)._3 ) {
-          xmin = p(i)._3
-        }
-        if( xmin>p(i)._5 ) {
-          xmin = p(i)._5
-        }
-
-        if( ymax<p(i)._2 ) {
-          ymax = p(i)._2
-        }
-        if( ymax<p(i)._4 ) {
-          ymax = p(i)._4
-        }
-        if( ymax<p(i)._6 ) {
-          ymax = p(i)._6
-        }
-
-        if( ymin>p(i)._2 ) {
-          ymin = p(i)._2
-        }
-        if( ymin>p(i)._4 ) {
-          ymin = p(i)._4
-        }
-        if( ymin>p(i)._6 ) {
-          ymin = p(i)._6
-        }
-
-      }
-
-      for( i <- 0 to p.size-1 ) {
-
-        var x_ = (((p(i)._1 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
-        var y_ = (((p(i)._2 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
-        var x0 = (((p(i)._3 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
-        var y0 = (((p(i)._4 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
-        var xs = (((p(i)._5 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
-        var ys = (((p(i)._6 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
-
-        g.setColor( Color.black )
-        plotDot( g, x_, y_ )
-        plotLine( g, x_, y_, x0, y0 )
-        g.setColor( Color.gray )
-        plotLine( g, x0, y0, xs, ys )
-
-      }
-
-      if( approxPoints != null ) {
-
-        var p = approxPoints.map( T => calcCord( T.toArray, angle ) )
-
-        for( i <- 0 to p.size-1 ) {
-
-          var x_ = (((p(i)._1 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
-          var y_ = (((p(i)._2 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
-          var x0 = (((p(i)._3 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
-          var y0 = (((p(i)._4 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
-          var xs = (((p(i)._5 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
-          var ys = (((p(i)._6 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
-
-          g.setColor( Color.red )
-          plotDot( g, x_, y_ )
-          plotLine( g, x_, y_, x0, y0 )
-          g.setColor( Color.magenta )
-          plotLine( g, x0, y0, xs, ys )
-
-        }
-
-      }
-
-    }
-  }
-}
-
-class OutputFrame3D( title: String, shadowFrac: Double ) extends Frame( title ) {
-
-  val wd = 800
-  val ht = 600
-
-  def this( title: String ) = this( title, .25 )
-
-  var outputCanvas = new OutputCanvas3D( wd, ht, shadowFrac )
-
-  def apply() {
-    addWindowListener( windowAdapter3D )
-    setSize( wd, ht )
-    add( "Center", outputCanvas )
-    show()
-  }
-
-  def setData( rdd: RDD[Vector] ) {
-    outputCanvas.setData( rdd )
-  }
-
-  def setAngle( angle: Double ) {
-    outputCanvas.angle = angle
-  }
-
-  def setApproxPoints( rdd: RDD[Vector] ) {
-    outputCanvas.setApproxPoints( rdd )
-  }
-
-}

From 18ac97940c5b40109aa38c43b2177e210d59c364 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Mon, 15 Sep 2014 09:25:18 +0800
Subject: [PATCH 035/143] Update ArtificialNeuralNetwork.scala

Changed "X = X + 1" to "X += 1".
---
 .../mllib/ann/ArtificialNeuralNetwork.scala   | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index 9294c46727f5c..bc7e3e4b6ceec 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -81,7 +81,7 @@ class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topol
     while(l <= L) {
       tmp(l) = curPos
       curPos = curPos + (topology(l - 1) + 1) * (topology(l))
-      l = l + 1
+      l += 1
     }
     tmp
   }
@@ -96,7 +96,7 @@ class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topol
     i = 0
     while(i < topology(0)) {
       arrPrev(i) = arrData(i)
-      i = i + 1
+      i += 1
     }
     l = 1
     while(l <= L) {
@@ -108,15 +108,15 @@ class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topol
         while( i < topology(l - 1) ) {
           cum = cum +
             arrPrev(i) * arrWeights(ofsWeight(l) + (topology(l - 1) + 1) * j + i)
-          i = i + 1
+          i += 1
         }
         cum = cum +
           arrWeights(ofsWeight(l) + (topology(l - 1) + 1) * j + topology(l - 1)) // bias
         arrCur(j) = g(cum)
-        j = j + 1
+        j += 1
       }
       arrPrev = arrCur
-      l = l + 1
+      l += 1
     }
     arrPrev
   }
@@ -201,7 +201,6 @@ object ArtificialNeuralNetwork {
     train(input, model.topology, model.weights, numIterations, stepSize)
   }
 
-
   def train(
       input: RDD[(Vector, Vector)],
       topology: Array[Int],
@@ -222,23 +221,23 @@ object ArtificialNeuralNetwork {
 
   def randomWeights(topology: Array[Int]): Vector = {
     val rand = new XORShiftRandom()
-    
+
     var i: Int = 0
     var l: Int = 0
-    
+
     val noWeights = {
       var tmp = 0
       var i = 1
       while(i < topology.size) {
         tmp = tmp + topology(i) * (topology(i - 1) + 1)
-        i = i + 1
+        i += 1
       }
       tmp
     }
 
     val initialWeightsArr = new Array[Double](noWeights)
     var pos = 0;
-    
+
     l = 1
     while( l < topology.length) {
       i = 0
@@ -308,11 +307,11 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
     val arrData = data.toArray
     val arrWeights = weights.toArray
     val arrNodes = new Array[Double](noNodes)
-        
+
     var i: Int = 0
     var j: Int = 0
     var l: Int = 0
-    
+
     // forward run
     i = 0;
     while(i < topology(0)) {
@@ -343,6 +342,7 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
       arrDiff(j) = (arrNodes(ofsNode(L) + j) - arrData(topology(0) + j))
       j += 1
     }
+
     var err: Double = 0;
     j = 0
     while(j < topology(L)) {

From daf137552110bc468b4f590770825e1a51783a0e Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Mon, 15 Sep 2014 09:26:05 +0800
Subject: [PATCH 036/143] Update ANNSuite.scala

Added test for gradient.
---
 .../org/apache/spark/mllib/ann/ANNSuite.scala | 116 ++++++++++++++++--
 1 file changed, 106 insertions(+), 10 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
index 692366699a5fb..c6ef5b79ed2f6 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
@@ -19,20 +19,22 @@ package org.apache.spark.mllib.ann
 
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.util.LocalSparkContext
+import org.apache.spark.util.random.XORShiftRandom
 import org.scalatest.FunSuite
 
 class ANNSuite extends FunSuite with LocalSparkContext {
-  private val inputs = Array[Array[Double]](
-    Array[Double](0,0),
-    Array[Double](0,1),
-    Array[Double](1,0),
-    Array[Double](1,1)
-  )
-  private val outputs = Array[Double](0, 1, 1, 0)
-  private val inputSize = 2
-  private val hiddenSize = 5
-  private val outputSize = 1
+
   test("ANN learns XOR function") {
+    val inputs = Array[Array[Double]](
+      Array[Double](0,0),
+      Array[Double](0,1),
+      Array[Double](1,0),
+      Array[Double](1,1)
+    )
+    val outputs = Array[Double](0, 1, 1, 0)
+    val inputSize = 2
+    val hiddenSize = 5
+    val outputSize = 1
     val data = inputs.zip(outputs).map { case(features, label) =>
       (Vectors.dense(features), Vectors.dense(Array(label)))}
     val rddData = sc.parallelize(data, 2)
@@ -42,4 +44,98 @@ class ANNSuite extends FunSuite with LocalSparkContext {
       (model.predictV(input)(0), label(0)) }.collect()
     assert(predictionAndLabels.forall { case(p, l) => (math.round(p) - l) == 0 })
   }
+
+  test("Gradient of ANN") {
+
+    val eps = 1e-6
+    val accept = 1e-7
+
+    val topologyArr = Array[Array[Int]](
+      Array[Int](1, 5, 1),
+      Array[Int](5, 10, 5, 3),
+      Array[Int](128, 256, 128)
+    )
+
+    val rnd = new XORShiftRandom(0)
+
+    var cnt = 0
+    while( cnt<topologyArr.length ) {
+
+      val topology = topologyArr(cnt)
+      val L = topology.length - 1
+      val noInp = topology(0)
+      val noOut = topology(L)
+      val annGradient = new ANNLeastSquaresGradient(topology)
+      var noWeights = 0
+
+      var l = 1
+      while(l <= L) {
+        noWeights += (topology(l - 1) + 1)*(topology(l))
+        l += 1
+      }
+
+      val arrWeights = new Array[Double](noWeights)
+
+      var w = 0
+      while(w < noWeights) {
+        arrWeights(w) = rnd.nextDouble()
+        w += 1
+      }
+
+      val arrInp = new Array[Double](noInp)
+      val arrOut = new Array[Double](noOut)
+      val arrData = new Array[Double](noInp + noOut)
+
+      w = 0
+      while(w < noInp) {
+        arrInp(w) = rnd.nextDouble()
+        arrData(w) = arrInp(w)
+        w += 1
+      }
+
+      w = 0
+      while(w < noOut) {
+        arrOut(w) = rnd.nextDouble()
+        arrData(noInp + w) = arrOut(w)
+        w += 1
+      }
+
+      val data = Vectors.dense( arrData )
+      val brzOut = Vectors.dense( arrOut ).toBreeze
+      val weights = Vectors.dense( arrWeights )
+      val gradient = annGradient.compute( data, 0.0, weights )._1
+
+      val arrTmpWeights = new Array[Double]( noWeights )
+      Array.copy(arrWeights, 0, arrTmpWeights, 0, noWeights )
+      val tmpWeights = Vectors.dense( arrTmpWeights )
+
+      w = 0
+      while(w < noWeights)
+      {
+        arrTmpWeights(w) = arrTmpWeights(w) + eps
+
+        val annModel1 = new ArtificialNeuralNetworkModel(weights, topology)
+        val brzO1 = annModel1.predictV(data).toBreeze
+
+        val annModel2 = new ArtificialNeuralNetworkModel(tmpWeights, topology)
+        val brzO2 = annModel2.predictV(data).toBreeze
+
+        val E1 = .5*((brzO1 - brzOut).dot(brzO1 - brzOut))
+        val E2 = .5*((brzO2 - brzOut).dot(brzO2 - brzOut))
+        val dEdW = ( E2 - E1 ) / eps
+
+        val gradw = gradient(w)
+        val err = math.abs(dEdW - gradw)
+        assert(err < accept, s"Difference between calculated and approximated gradient too large (approximated $dEdW, calculated $gradw, difference $err)" )
+
+        arrTmpWeights(w) = arrTmpWeights(w) - eps
+
+        w += 1
+      }
+
+      cnt += 1
+    }
+
+  }
+
 }

From 5e3345c1d5605de8c383bfa91e31316eab21752a Mon Sep 17 00:00:00 2001
From: Alexander Ulanov <nashb@yandex.ru>
Date: Wed, 17 Sep 2014 16:09:15 +0400
Subject: [PATCH 037/143] minor style fixes

---
 .../mllib/ann/ArtificialNeuralNetwork.scala   | 22 +++++++++----------
 .../org/apache/spark/mllib/ann/ANNSuite.scala |  6 ++---
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index bc7e3e4b6ceec..aa0d42bedcbbf 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -80,7 +80,7 @@ class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topol
     var l = 1
     while(l <= L) {
       tmp(l) = curPos
-      curPos = curPos + (topology(l - 1) + 1) * (topology(l))
+      curPos = curPos + (topology(l - 1) + 1) * topology(l)
       l += 1
     }
     tmp
@@ -236,14 +236,14 @@ object ArtificialNeuralNetwork {
     }
 
     val initialWeightsArr = new Array[Double](noWeights)
-    var pos = 0;
+    var pos = 0
 
     l = 1
     while( l < topology.length) {
       i = 0
       while(i < (topology(l) * (topology(l - 1) + 1))) {
         initialWeightsArr(pos) = (rand.nextDouble * 4.8 - 2.4) / (topology(l - 1) + 1)
-        pos += 1;
+        pos += 1
         i += 1
       }
       l += 1
@@ -271,12 +271,12 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
 
   val ofsWeight: Array[Int] = {
     val tmp = new Array[Int](L + 1)
-    var curPos = 0;
-    tmp(0) = 0;
+    var curPos = 0
+    tmp(0) = 0
     var l = 1
     while(l <= L) {
       tmp(l) = curPos
-      curPos = curPos + (topology(l - 1) + 1) * (topology(l))
+      curPos = curPos + (topology(l - 1) + 1) * topology(l)
       l += 1
     }
     tmp
@@ -313,7 +313,7 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
     var l: Int = 0
 
     // forward run
-    i = 0;
+    i = 0
     while(i < topology(0)) {
       arrNodes(i) = arrData(i)
       i += 1
@@ -322,7 +322,7 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
     while( l <= L ) {
       j = 0
       while(j < topology(l)) {
-        var cum: Double = 0.0;
+        var cum: Double = 0.0
         i = 0
         while(i < topology(l - 1)) {
           cum = cum +
@@ -339,11 +339,11 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
     val arrDiff = new Array[Double](topology(L))
     j = 0
     while( j < topology(L)) {
-      arrDiff(j) = (arrNodes(ofsNode(L) + j) - arrData(topology(0) + j))
+      arrDiff(j) = arrNodes(ofsNode(L) + j) - arrData(topology(0) + j)
       j += 1
     }
 
-    var err: Double = 0;
+    var err: Double = 0
     j = 0
     while(j < topology(L)) {
       err = err + arrDiff(j) * arrDiff(j)
@@ -406,7 +406,7 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
       cumGradient: Vector): Double = {
     val (grad, err) = compute(data, label, weights)
     cumGradient.toBreeze += grad.toBreeze
-    return err
+    err
   }
 }
 
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
index c6ef5b79ed2f6..ddf3ff9009ec4 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
@@ -70,7 +70,7 @@ class ANNSuite extends FunSuite with LocalSparkContext {
 
       var l = 1
       while(l <= L) {
-        noWeights += (topology(l - 1) + 1)*(topology(l))
+        noWeights += (topology(l - 1) + 1) * topology(l)
         l += 1
       }
 
@@ -120,8 +120,8 @@ class ANNSuite extends FunSuite with LocalSparkContext {
         val annModel2 = new ArtificialNeuralNetworkModel(tmpWeights, topology)
         val brzO2 = annModel2.predictV(data).toBreeze
 
-        val E1 = .5*((brzO1 - brzOut).dot(brzO1 - brzOut))
-        val E2 = .5*((brzO2 - brzOut).dot(brzO2 - brzOut))
+        val E1 = .5* (brzO1 - brzOut).dot(brzO1 - brzOut)
+        val E2 = .5* (brzO2 - brzOut).dot(brzO2 - brzOut)
         val dEdW = ( E2 - E1 ) / eps
 
         val gradw = gradient(w)

From 6c657c30e17546e6602f234bcef62c0a7a1c1efe Mon Sep 17 00:00:00 2001
From: Alexander Ulanov <nashb@yandex.ru>
Date: Wed, 17 Sep 2014 18:11:20 +0400
Subject: [PATCH 038/143] Forward propagation code sharing

---
 .../mllib/ann/ArtificialNeuralNetwork.scala   | 91 ++++++-------------
 1 file changed, 27 insertions(+), 64 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index aa0d42bedcbbf..ac13119fd9d40 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -69,56 +69,11 @@ import org.apache.spark.util.random.XORShiftRandom
  */
 
 class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topology: Array[Int])
-  extends Serializable {
-
-  private val L = topology.length - 1
-
-  private val ofsWeight: Array[Int] = {
-    val tmp = new Array[Int](L + 1)
-    var curPos = 0
-    tmp(0) = 0
-    var l = 1
-    while(l <= L) {
-      tmp(l) = curPos
-      curPos = curPos + (topology(l - 1) + 1) * topology(l)
-      l += 1
-    }
-    tmp
-  }
-
-  private def g(x: Double) = 1.0 / (1.0 + math.exp(-x))
+  extends Serializable with ANNHelper {
 
   def computeValues(arrData: Array[Double], arrWeights: Array[Double]): Array[Double] = {
-    var arrPrev = new Array[Double](topology(0))
-    var i: Int = 0
-    var j: Int = 0
-    var l: Int = 0
-    i = 0
-    while(i < topology(0)) {
-      arrPrev(i) = arrData(i)
-      i += 1
-    }
-    l = 1
-    while(l <= L) {
-      val arrCur = new Array[Double](topology(l))
-      j = 0
-      while(j < topology(l)) {
-        var cum: Double = 0.0
-        i = 0
-        while( i < topology(l - 1) ) {
-          cum = cum +
-            arrPrev(i) * arrWeights(ofsWeight(l) + (topology(l - 1) + 1) * j + i)
-          i += 1
-        }
-        cum = cum +
-          arrWeights(ofsWeight(l) + (topology(l - 1) + 1) * j + topology(l - 1)) // bias
-        arrCur(j) = g(cum)
-        j += 1
-      }
-      arrPrev = arrCur
-      l += 1
-    }
-    arrPrev
+    val arrNodes = forwardRun(arrData, arrWeights)
+    arrNodes.slice(arrNodes.size - topology(L), arrNodes.size)
   }
 
   def predictPoint(data: Vector, weights: Vector): Double = {
@@ -250,16 +205,14 @@ object ArtificialNeuralNetwork {
     }
     Vectors.dense(initialWeightsArr)
   }
-
 }
 
-private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
-
-  private def g(x: Double) = 1.0 / (1.0 + math.exp(-x))
+private[ann] trait ANNHelper {
+  protected val topology: Array[Int]
+  protected def g(x: Double) = 1.0 / (1.0 + math.exp(-x))
+  protected val L = topology.length - 1
 
-  private val L = topology.length - 1
-
-  private val noWeights = {
+  protected val noWeights = {
     var tmp = 0
     var l = 1
     while(l <= L) {
@@ -269,7 +222,7 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
     tmp
   }
 
-  val ofsWeight: Array[Int] = {
+  protected val ofsWeight: Array[Int] = {
     val tmp = new Array[Int](L + 1)
     var curPos = 0
     tmp(0) = 0
@@ -282,7 +235,7 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
     tmp
   }
 
-  val noNodes: Int = {
+  protected val noNodes: Int = {
     var tmp: Integer = 0
     var l = 0
     while(l < topology.size) {
@@ -292,7 +245,7 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
     tmp
   }
 
-  val ofsNode: Array[Int] = {
+  protected val ofsNode: Array[Int] = {
     val tmp = new Array[Int](L + 1)
     tmp(0) = 0
     var l = 1
@@ -303,16 +256,11 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
     tmp
   }
 
-  override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
-    val arrData = data.toArray
-    val arrWeights = weights.toArray
+  protected def forwardRun(arrData: Array[Double], arrWeights: Array[Double]): Array[Double] = {
     val arrNodes = new Array[Double](noNodes)
-
     var i: Int = 0
     var j: Int = 0
     var l: Int = 0
-
-    // forward run
     i = 0
     while(i < topology(0)) {
       arrNodes(i) = arrData(i)
@@ -336,6 +284,21 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
       }
       l += 1
     }
+    arrNodes
+  }
+}
+
+private class ANNLeastSquaresGradient(val topology: Array[Int]) extends Gradient with ANNHelper {
+
+  override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
+    val arrData = data.toArray
+    val arrWeights = weights.toArray
+
+    var i: Int = 0
+    var j: Int = 0
+    var l: Int = 0
+    // forward run
+    val arrNodes = forwardRun(arrData, arrWeights)
     val arrDiff = new Array[Double](topology(L))
     j = 0
     while( j < topology(L)) {

From 5ab026356e000f1b541fead48b5f6b8f9e1d1134 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Mon, 22 Sep 2014 16:22:27 +0800
Subject: [PATCH 039/143] Update ArtificialNeuralNetwork.scala

Changed optimiser to LBFGS
---
 .../mllib/ann/ArtificialNeuralNetwork.scala   | 192 ++++++++++--------
 1 file changed, 112 insertions(+), 80 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index ac13119fd9d40..fe200f8c1aae2 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -69,52 +69,86 @@ import org.apache.spark.util.random.XORShiftRandom
  */
 
 class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topology: Array[Int])
-  extends Serializable with ANNHelper {
+  extends Serializable {
 
-  def computeValues(arrData: Array[Double], arrWeights: Array[Double]): Array[Double] = {
-    val arrNodes = forwardRun(arrData, arrWeights)
-    arrNodes.slice(arrNodes.size - topology(L), arrNodes.size)
-  }
+  private val L = topology.length - 1
 
-  def predictPoint(data: Vector, weights: Vector): Double = {
-    val outp = computeValues(data.toArray, weights.toArray)
-    outp(0)
+  private val ofsWeight: Array[Int] = {
+    val tmp = new Array[Int](L + 1)
+    var curPos = 0
+    tmp(0) = 0
+    var l = 1
+    while(l <= L) {
+      tmp(l) = curPos
+      curPos = curPos + (topology(l - 1) + 1) * (topology(l))
+      l += 1
+    }
+    tmp
   }
 
-  def predictPointV(data: Vector, weights: Vector): Vector = {
-    Vectors.dense(computeValues(data.toArray, weights.toArray))
+  private def g(x: Double) = 1.0 / (1.0 + math.exp(-x))
+
+  def computeValues(arrData: Array[Double], arrWeights: Array[Double]): Array[Double] = {
+    var arrPrev = new Array[Double](topology(0))
+    var i: Int = 0
+    var j: Int = 0
+    var l: Int = 0
+    i = 0
+    while(i < topology(0)) {
+      arrPrev(i) = arrData(i)
+      i += 1
+    }
+    l = 1
+    while(l <= L) {
+      val arrCur = new Array[Double](topology(l))
+      j = 0
+      while(j < topology(l)) {
+        var cum: Double = 0.0
+        i = 0
+        while( i < topology(l - 1) ) {
+          cum = cum +
+            arrPrev(i) * arrWeights(ofsWeight(l) + (topology(l - 1) + 1) * j + i)
+          i += 1
+        }
+        cum = cum +
+          arrWeights(ofsWeight(l) + (topology(l - 1) + 1) * j + topology(l - 1)) // bias
+        arrCur(j) = g(cum)
+        j += 1
+      }
+      arrPrev = arrCur
+      l += 1
+    }
+    arrPrev
   }
 
   /**
    * Predict values for a single data point using the model trained.
    *
-   * @param testData array representing a single data point
+   * @param testData Vector representing a single data point
    * @return Vector prediction from the trained model
    *
    *         Returns the complete vector.
    */
   def predictV(testData: Vector): Vector = {
-    predictPointV(testData, weights)
+    Vectors.dense(computeValues(testData.toArray, weights.toArray))
   }
 
 }
 
 class ArtificialNeuralNetwork private(
-    private var topology: Array[Int],
-    private var numIterations: Int,
-    private var stepSize: Double,
-    private var miniBatchFraction: Double)
+    topology: Array[Int],
+    maxNumIterations: Int,
+    convergenceTol: Double)
   extends Serializable {
 
-  private val gradient = new ANNLeastSquaresGradient(topology)
-  private val updater = new ANNUpdater()
-  private val optimizer = new GradientDescent(gradient, updater)
-    .setStepSize(stepSize)
-    .setNumIterations(numIterations)
-    .setMiniBatchFraction(miniBatchFraction)
+  private var gradient: Gradient = new ANNLeastSquaresGradient(topology)
+  private var updater: Updater = new ANNUpdater()
+  private var optimizer: Optimizer = new LBFGS(gradient, updater).
+    setConvergenceTol( convergenceTol ).
+    setMaxNumIterations(maxNumIterations)
 
   private def run(input: RDD[(Vector, Vector)], initialWeights: Vector):
-  ArtificialNeuralNetworkModel = {
+      ArtificialNeuralNetworkModel = {
     val data = input.map(v =>
       (0.0,
         Vectors.fromBreeze(DenseVector.vertcat(
@@ -124,54 +158,60 @@ class ArtificialNeuralNetwork private(
     val weights = optimizer.optimize(data, initialWeights)
     new ArtificialNeuralNetworkModel(weights, topology)
   }
+
 }
 
 object ArtificialNeuralNetwork {
 
+  var optimizer: Optimizer = null;
+
   def train(
       input: RDD[(Vector, Vector)],
       topology: Array[Int],
       initialWeights: Vector,
-      numIterations: Int,
-      stepSize: Double,
-      miniBatchFraction: Double): ArtificialNeuralNetworkModel = {
-    new ArtificialNeuralNetwork(topology, numIterations, stepSize, miniBatchFraction)
+      maxNumIterations: Int): ArtificialNeuralNetworkModel = {
+    new ArtificialNeuralNetwork(topology, maxNumIterations, 1e-4)
       .run(input, initialWeights)
   }
 
   def train(
-      input: RDD[(Vector, Vector)],
-      topology: Array[Int],
-      initialWeights: Vector,
-      numIterations: Int,
-      stepSize: Double): ArtificialNeuralNetworkModel = {
-    new ArtificialNeuralNetwork(topology, numIterations, stepSize, 1.0).run(input, initialWeights)
+      input: RDD[(Vector,Vector)],
+      model: ArtificialNeuralNetworkModel,
+      maxNumIterations: Int): ArtificialNeuralNetworkModel = {
+    train(input, model.topology, model.weights, maxNumIterations)
   }
 
   def train(
-      input: RDD[(Vector,Vector)],
-      model: ArtificialNeuralNetworkModel,
-      numIterations: Int,
-      stepSize: Double): ArtificialNeuralNetworkModel = {
-    train(input, model.topology, model.weights, numIterations, stepSize)
+      input: RDD[(Vector, Vector)],
+      topology: Array[Int],
+      maxNumIterations: Int): ArtificialNeuralNetworkModel = {
+    train(input, topology, randomWeights(topology), maxNumIterations)
   }
 
   def train(
       input: RDD[(Vector, Vector)],
       topology: Array[Int],
-      numIterations: Int,
-      stepSize: Double,
-      miniBatchFraction: Double): ArtificialNeuralNetworkModel = {
-    new ArtificialNeuralNetwork(topology, numIterations, stepSize, miniBatchFraction)
-      .run(input, randomWeights(topology))
+      initialWeights: Vector,
+      maxNumIterations: Int,
+      convergenceTol: Double): ArtificialNeuralNetworkModel = {
+    new ArtificialNeuralNetwork(topology, maxNumIterations, convergenceTol)
+      .run(input, initialWeights)
+  }
+
+  def train(
+      input: RDD[(Vector,Vector)],
+      model: ArtificialNeuralNetworkModel,
+      maxNumIterations: Int,
+      convergenceTol: Double): ArtificialNeuralNetworkModel = {
+    train(input, model.topology, model.weights, maxNumIterations, convergenceTol)
   }
 
   def train(
       input: RDD[(Vector, Vector)],
       topology: Array[Int],
-      numIterations: Int,
-      stepSize: Double): ArtificialNeuralNetworkModel = {
-    train(input, topology, numIterations, stepSize, 1.0)
+      maxNumIterations: Int,
+      convergenceTol: Double): ArtificialNeuralNetworkModel = {
+    train(input, topology, randomWeights(topology), maxNumIterations, convergenceTol)
   }
 
   def randomWeights(topology: Array[Int]): Vector = {
@@ -191,28 +231,30 @@ object ArtificialNeuralNetwork {
     }
 
     val initialWeightsArr = new Array[Double](noWeights)
-    var pos = 0
+    var pos = 0;
 
     l = 1
     while( l < topology.length) {
       i = 0
       while(i < (topology(l) * (topology(l - 1) + 1))) {
         initialWeightsArr(pos) = (rand.nextDouble * 4.8 - 2.4) / (topology(l - 1) + 1)
-        pos += 1
+        pos += 1;
         i += 1
       }
       l += 1
     }
     Vectors.dense(initialWeightsArr)
   }
+
 }
 
-private[ann] trait ANNHelper {
-  protected val topology: Array[Int]
-  protected def g(x: Double) = 1.0 / (1.0 + math.exp(-x))
-  protected val L = topology.length - 1
+private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
 
-  protected val noWeights = {
+  private def g(x: Double) = 1.0 / (1.0 + math.exp(-x))
+
+  private val L = topology.length - 1
+
+  private val noWeights = {
     var tmp = 0
     var l = 1
     while(l <= L) {
@@ -222,20 +264,20 @@ private[ann] trait ANNHelper {
     tmp
   }
 
-  protected val ofsWeight: Array[Int] = {
+  val ofsWeight: Array[Int] = {
     val tmp = new Array[Int](L + 1)
-    var curPos = 0
-    tmp(0) = 0
+    var curPos = 0;
+    tmp(0) = 0;
     var l = 1
     while(l <= L) {
       tmp(l) = curPos
-      curPos = curPos + (topology(l - 1) + 1) * topology(l)
+      curPos = curPos + (topology(l - 1) + 1) * (topology(l))
       l += 1
     }
     tmp
   }
 
-  protected val noNodes: Int = {
+  val noNodes: Int = {
     var tmp: Integer = 0
     var l = 0
     while(l < topology.size) {
@@ -245,7 +287,7 @@ private[ann] trait ANNHelper {
     tmp
   }
 
-  protected val ofsNode: Array[Int] = {
+  val ofsNode: Array[Int] = {
     val tmp = new Array[Int](L + 1)
     tmp(0) = 0
     var l = 1
@@ -256,12 +298,17 @@ private[ann] trait ANNHelper {
     tmp
   }
 
-  protected def forwardRun(arrData: Array[Double], arrWeights: Array[Double]): Array[Double] = {
+  override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
+    val arrData = data.toArray
+    val arrWeights = weights.toArray
     val arrNodes = new Array[Double](noNodes)
+
     var i: Int = 0
     var j: Int = 0
     var l: Int = 0
-    i = 0
+
+    // forward run
+    i = 0;
     while(i < topology(0)) {
       arrNodes(i) = arrData(i)
       i += 1
@@ -270,7 +317,7 @@ private[ann] trait ANNHelper {
     while( l <= L ) {
       j = 0
       while(j < topology(l)) {
-        var cum: Double = 0.0
+        var cum: Double = 0.0;
         i = 0
         while(i < topology(l - 1)) {
           cum = cum +
@@ -284,29 +331,14 @@ private[ann] trait ANNHelper {
       }
       l += 1
     }
-    arrNodes
-  }
-}
-
-private class ANNLeastSquaresGradient(val topology: Array[Int]) extends Gradient with ANNHelper {
-
-  override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
-    val arrData = data.toArray
-    val arrWeights = weights.toArray
-
-    var i: Int = 0
-    var j: Int = 0
-    var l: Int = 0
-    // forward run
-    val arrNodes = forwardRun(arrData, arrWeights)
     val arrDiff = new Array[Double](topology(L))
     j = 0
     while( j < topology(L)) {
-      arrDiff(j) = arrNodes(ofsNode(L) + j) - arrData(topology(0) + j)
+      arrDiff(j) = (arrNodes(ofsNode(L) + j) - arrData(topology(0) + j))
       j += 1
     }
 
-    var err: Double = 0
+    var err: Double = 0;
     j = 0
     while(j < topology(L)) {
       err = err + arrDiff(j) * arrDiff(j)
@@ -369,7 +401,7 @@ private class ANNLeastSquaresGradient(val topology: Array[Int]) extends Gradient
       cumGradient: Vector): Double = {
     val (grad, err) = compute(data, label, weights)
     cumGradient.toBreeze += grad.toBreeze
-    err
+    return err
   }
 }
 

From 577a13aa52e1d81799ec6765e6921391493efcf5 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Mon, 22 Sep 2014 16:23:51 +0800
Subject: [PATCH 040/143] Update ANNSuite.scala

Update due to optimiser change.
---
 mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
index ddf3ff9009ec4..76d0e3e9da7be 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
@@ -39,7 +39,7 @@ class ANNSuite extends FunSuite with LocalSparkContext {
       (Vectors.dense(features), Vectors.dense(Array(label)))}
     val rddData = sc.parallelize(data, 2)
     val topology = Array[Int](inputSize, hiddenSize, outputSize)
-    val model = ArtificialNeuralNetwork.train(rddData, topology, 2000, 2.0, 1.0)
+    val model = ArtificialNeuralNetwork.train(rddData, topology, 100, 1e-5)
     val predictionAndLabels = rddData.map { case(input, label) =>
       (model.predictV(input)(0), label(0)) }.collect()
     assert(predictionAndLabels.forall { case(p, l) => (math.round(p) - l) == 0 })

From d048878ab69204bf2bfb168da88df7c4ab9d6898 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Mon, 22 Sep 2014 16:25:08 +0800
Subject: [PATCH 041/143] Delete TestANN.scala

Current version makes no sense with fast LBFGS algorithm. Adopted version moved to examples.
---
 .../org/apache/spark/mllib/ann/TestANN.scala  | 581 ------------------
 1 file changed, 581 deletions(-)
 delete mode 100644 mllib/src/test/scala/org/apache/spark/mllib/ann/TestANN.scala

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestANN.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestANN.scala
deleted file mode 100644
index 9e6f59df3a11e..0000000000000
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestANN.scala
+++ /dev/null
@@ -1,581 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.spark.mllib.ann
-
-import java.awt._
-import java.awt.event._
-import java.text.SimpleDateFormat
-import java.util.Calendar
-import org.apache.spark._
-import org.apache.spark.mllib.ann._
-import org.apache.spark.mllib.linalg._
-import org.apache.spark.mllib.regression._
-import org.apache.spark.rdd.RDD
-import scala.Array.canBuildFrom
-import scala.util.Random
-
-object windowAdapter extends WindowAdapter {
-
-  override def windowClosing( e: WindowEvent ) {
-    System.exit(0)
-  }
-
-}
-
-class OutputCanvas2D( wd: Int, ht: Int ) extends Canvas {
-
-  var points: Array[Vector] = null
-  var approxPoints: Array[Vector] = null
-
-  /* input: rdd of (x,y) vectors */
-  def setData( rdd: RDD[Vector] ) {
-    points = rdd.collect
-    repaint
-  }
-
-  def setApproxPoints( rdd: RDD[Vector] ) {
-    approxPoints = rdd.collect
-    repaint
-  }
-
-  def plotDot( g: Graphics, x: Int, y: Int ) {
-    val r = 5
-    val noSamp = 6*r
-    var x1 = x
-    var y1 = y + r
-    for( j <- 1 to noSamp ) {
-      val x2 = (x.toDouble + math.sin( j.toDouble*2*math.Pi/noSamp )*r + .5).toInt
-      val y2 = (y.toDouble + math.cos( j.toDouble*2*math.Pi/noSamp )*r + .5).toInt
-      g.drawLine( x1, ht - y1, x2, ht - y2 )
-      x1 = x2
-      y1 = y2
-    }
-  }
-
-  override def paint( g: Graphics) = {
-
-    var xmax: Double = 0.0
-    var xmin: Double = 0.0
-    var ymax: Double = 0.0
-    var ymin: Double = 0.0
-
-    if( points!=null ) {
-
-      g.setColor( Color.black )
-      val x = points.map( T => (T.toArray)(0) )
-      val y = points.map( T => (T.toArray)(1) )
-
-      xmax = x.max
-      xmin = x.min
-      ymax = y.max
-      ymin = y.min
-
-      for( i <- 0 to x.size - 1 ) {
-
-        val xr = (((x(i).toDouble - xmin)/(xmax - xmin))*wd + .5).toInt
-        val yr = (((y(i).toDouble - ymin)/(ymax - ymin))*ht + .5).toInt
-        plotDot( g, xr, yr )
-
-      }
-
-      if( approxPoints != null ) {
-
-        g.setColor( Color.red )
-        val x = approxPoints.map( T => (T.toArray)(0) )
-        val y = approxPoints.map( T => (T.toArray)(1) )
-
-        for( i <- 0 to x.size-1 ) {
-          val xr = (((x(i).toDouble - xmin)/(xmax - xmin))*wd + .5).toInt
-          val yr = (((y(i).toDouble - ymin)/(ymax - ymin))*ht + .5).toInt
-          plotDot( g, xr, yr )
-        }
-
-      }
-
-    }
-
-  }
-
-}
-
-class OutputFrame2D( title: String ) extends Frame( title ) {
-
-  val wd = 800
-  val ht = 600
-
-  var outputCanvas = new OutputCanvas2D( wd, ht )
-
-  def apply() {
-    addWindowListener( windowAdapter )
-    setSize( wd, ht )
-    add( "Center", outputCanvas )
-    show()
-  }
-
-  def setData( rdd: RDD[Vector] ) {
-    outputCanvas.setData( rdd )
-  }
-
-  def setApproxPoints( rdd: RDD[Vector] ) {
-    outputCanvas.setApproxPoints( rdd )
-  }
-
-
-}
-
-object windowAdapter3D extends WindowAdapter {
-
-  override def windowClosing( e: WindowEvent ) {
-    System.exit(0)
-  }
-
-}
-
-class OutputCanvas3D( wd: Int, ht: Int, shadowFrac: Double ) extends Canvas {
-
-  var angle: Double = 0
-  var points: Array[Vector] = null
-  var approxPoints: Array[Vector] = null
-
-  /* 3 dimensional (x,y,z) vector */
-  def setData( rdd: RDD[Vector] ) {
-    points = rdd.collect
-    repaint
-  }
-
-  def setApproxPoints( rdd: RDD[Vector] ) {
-    approxPoints = rdd.collect
-    repaint
-  }
-
-  def plotDot( g: Graphics, x: Int, y: Int ) {
-    val r = 5
-    val noSamp = 6*r
-    var x1 = x
-    var y1 = y + r
-    for( j <- 1 to noSamp ) {
-      val x2 = (x.toDouble + math.sin( j.toDouble*2*math.Pi/noSamp )*r + .5).toInt
-      val y2 = (y.toDouble + math.cos( j.toDouble*2*math.Pi/noSamp )*r + .5).toInt
-      g.drawLine( x1, ht - y1, x2, ht - y2 )
-      x1 = x2
-      y1 = y2
-    }
-  }
-
-  def plotLine( g: Graphics, x1: Int, y1: Int, x2: Int, y2: Int ) {
-    g.drawLine( x1, ht - y1, x2, ht - y2 )
-  }
-
-  def calcCord( arr: Array[Double], angle: Double ): (Double, Double, Double, Double, Double, Double) = {
-
-    var arrOut = new Array[Double](6)
-
-    val x = arr(0)*math.cos( angle ) - arr(1)*math.sin( angle )
-    val y = arr(0)*math.sin( angle ) + arr(1)*math.cos( angle )
-    val z = arr(2)
-
-    val x0 = arr(0)*math.cos( angle ) - arr(1)*math.sin( angle )
-    val y0 = arr(0)*math.sin( angle ) + arr(1)*math.cos( angle )
-    val z0 = 0
-
-    val xs = (arr(0) + shadowFrac*arr(2))*math.cos( angle ) - arr(1)*math.sin( angle )
-    val ys = (arr(0) + shadowFrac*arr(2))*math.sin( angle ) + arr(1)*math.cos( angle )
-    val zs = 0
-
-    arrOut(0) = y - .5*x
-    arrOut(1) = z - .25*x
-
-    arrOut(2) = y0 - .5*x0
-    arrOut(3) = z0 - .25*x0
-
-    arrOut(4) = ys - .5*xs
-    arrOut(5) = zs - .25*xs
-
-    ( arrOut(0), arrOut(1), arrOut(2), arrOut(3), arrOut(4), arrOut(5) )
-
-  }
-
-  override def paint( g: Graphics) = {
-
-    if( points!=null ) {
-
-      var p = points.map( T => calcCord( T.toArray, angle ) ).toArray
-
-      var xmax = p(0)._1
-      var xmin = p(0)._1
-      var ymax = p(0)._2
-      var ymin = p(0)._2
-
-      for( i <- 0 to p.size-1 ) {
-
-        if( xmax<p(i)._1 ) {
-          xmax = p(i)._1
-        }
-        if( xmax<p(i)._3 ) {
-          xmax = p(i)._3
-        }
-        if( xmax<p(i)._5 ) {
-          xmax = p(i)._5
-        }
-
-        if( xmin>p(i)._1 ) {
-          xmin = p(i)._1
-        }
-        if( xmin>p(i)._3 ) {
-          xmin = p(i)._3
-        }
-        if( xmin>p(i)._5 ) {
-          xmin = p(i)._5
-        }
-
-        if( ymax<p(i)._2 ) {
-          ymax = p(i)._2
-        }
-        if( ymax<p(i)._4 ) {
-          ymax = p(i)._4
-        }
-        if( ymax<p(i)._6 ) {
-          ymax = p(i)._6
-        }
-
-        if( ymin>p(i)._2 ) {
-          ymin = p(i)._2
-        }
-        if( ymin>p(i)._4 ) {
-          ymin = p(i)._4
-        }
-        if( ymin>p(i)._6 ) {
-          ymin = p(i)._6
-        }
-
-      }
-
-      for( i <- 0 to p.size-1 ) {
-
-        var x_ = (((p(i)._1 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
-        var y_ = (((p(i)._2 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
-        var x0 = (((p(i)._3 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
-        var y0 = (((p(i)._4 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
-        var xs = (((p(i)._5 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
-        var ys = (((p(i)._6 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
-
-        g.setColor( Color.black )
-        plotDot( g, x_, y_ )
-        plotLine( g, x_, y_, x0, y0 )
-        g.setColor( Color.gray )
-        plotLine( g, x0, y0, xs, ys )
-
-      }
-
-      if( approxPoints != null ) {
-
-        var p = approxPoints.map( T => calcCord( T.toArray, angle ) )
-
-        for( i <- 0 to p.size-1 ) {
-
-          var x_ = (((p(i)._1 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
-          var y_ = (((p(i)._2 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
-          var x0 = (((p(i)._3 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
-          var y0 = (((p(i)._4 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
-          var xs = (((p(i)._5 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
-          var ys = (((p(i)._6 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
-
-          g.setColor( Color.red )
-          plotDot( g, x_, y_ )
-          plotLine( g, x_, y_, x0, y0 )
-          g.setColor( Color.magenta )
-          plotLine( g, x0, y0, xs, ys )
-
-        }
-
-      }
-
-    }
-  }
-}
-
-class OutputFrame3D( title: String, shadowFrac: Double ) extends Frame( title ) {
-
-  val wd = 800
-  val ht = 600
-
-  def this( title: String ) = this( title, .25 )
-
-  var outputCanvas = new OutputCanvas3D( wd, ht, shadowFrac )
-
-  def apply() {
-    addWindowListener( windowAdapter3D )
-    setSize( wd, ht )
-    add( "Center", outputCanvas )
-    show()
-  }
-
-  def setData( rdd: RDD[Vector] ) {
-    outputCanvas.setData( rdd )
-  }
-
-  def setAngle( angle: Double ) {
-    outputCanvas.angle = angle
-  }
-
-  def setApproxPoints( rdd: RDD[Vector] ) {
-    outputCanvas.setApproxPoints( rdd )
-  }
-
-}
-
-object TestANN {
-
-  var rand = new Random( 0 )
-
-  def generateInput2D( f: Double => Double, xmin: Double, xmax: Double, noPoints: Int ): Array[(Vector,Vector)] =
-  {
-
-    var out = new Array[(Vector,Vector)](noPoints)
-
-    for( i <- 0 to noPoints - 1 ) {
-      val x = xmin + rand.nextDouble()*(xmax - xmin)
-      val y = f(x)
-      out(i) = ( Vectors.dense( x ), Vectors.dense( y ) )
-    }
-
-    return out
-
-  }
-
-
-  def generateInput3D( f: (Double,Double) => Double, xmin: Double, xmax: Double, ymin: Double, ymax: Double, noPoints: Int ): Array[(Vector,Vector)] = {
-
-    var out = new Array[(Vector,Vector)](noPoints)
-
-    for( i <- 0 to noPoints - 1 ) {
-
-      val x = xmin + rand.nextDouble()*(xmax - xmin)
-      val y = ymin + rand.nextDouble()*(ymax - ymin)
-      val z = f( x, y )
-
-      var arr = new Array[Double](2)
-
-      arr(0) = x
-      arr(1) = y
-      out(i) = ( Vectors.dense( arr ), Vectors.dense( z ) )
-
-    }
-
-    out
-
-  }
-
-  def generateInput4D( f: Double => (Double,Double,Double), tmin: Double, tmax: Double, noPoints: Int ): Array[(Vector,Vector)] = {
-
-    var out = new Array[(Vector,Vector)](noPoints)
-
-    for( i <- 0 to noPoints - 1 ) {
-
-      val t: Double = tmin + rand.nextDouble()*(tmax - tmin)
-      var arr = new Array[Double](3)
-      var F = f(t)
-
-      arr(0) = F._1
-      arr(1) = F._2
-      arr(2) = F._3
-
-      out(i) = ( Vectors.dense( t ), Vectors.dense( arr ) )
-    }
-
-    out
-
-  }
-
-  def f( T: Double ): Double = {
-    val y = 0.5 + Math.abs(T/5).toInt.toDouble*.15 + math.sin(T*math.Pi/10)*.1
-    assert( y<= 1)
-    y
-  }
-
-  def f3D( x: Double, y: Double ): Double = {
-    .5 + .24*Math.sin( x*2*math.Pi/10 ) + .24*Math.cos( y*2*math.Pi/10 )
-  }
-
-  def f4D( t: Double ): (Double, Double,Double) = {
-    val x = Math.abs(.8*Math.cos( t*2*math.Pi/20 ) ) + .1
-    val y = (11 + t)/22
-    val z = .5 + .35*Math.sin(t*2*math.Pi/5)*Math.cos( t*2*math.Pi/10 ) + .15*t/11
-    ( x, y, z )
-  }
-
-  def concat( v1: Vector, v2: Vector ): Vector = {
-
-    var a1 = v1.toArray
-    var a2 = v2.toArray
-    var a3 = new Array[Double]( a1.size + a2.size )
-
-    for( i <- 0 to a1.size - 1 ) {
-      a3(i) = a1(i)
-    }
-
-    for( i <- 0 to a2.size - 1 ) {
-      a3(i + a1.size) = a2(i)
-    }
-
-    Vectors.dense( a3 )
-
-  }
-
-  def main( arg: Array[String] ) {
-
-    println( "ANN tester" )
-    println
-
-    val formatter = new SimpleDateFormat("hh:mm:ss")    
-
-    var curAngle: Double = 0.0
-    var graphic: Boolean = false
-
-    if( (arg.length>0) && (arg(0)=="graph" ) ) {
-      graphic = true
-    }
-
-    var outputFrame2D: OutputFrame2D = null
-    var outputFrame3D: OutputFrame3D = null
-    var outputFrame4D: OutputFrame3D = null
-
-    if( graphic ) {
-
-      outputFrame2D = new OutputFrame2D( "x -> y" )
-      outputFrame2D.apply
-
-      outputFrame3D = new OutputFrame3D( "(x,y) -> z", 1 )
-      outputFrame3D.apply
-
-      outputFrame4D = new OutputFrame3D( "t -> (x,y,z)" )
-      outputFrame4D.apply
-
-    }
-
-    var A = 20.0
-    var B = 50.0
-
-    var conf = new SparkConf().setAppName("Parallel ANN").setMaster("local[1]")
-    var sc = new SparkContext(conf)
-
-    val testRDD2D = sc.parallelize( generateInput2D( T => f(T), -10, 10, 100 ), 2).cache
-    val testRDD3D = sc.parallelize( generateInput3D( (x,y) => f3D(x,y), -10, 10, -10, 10, 200 ), 2).cache
-    val testRDD4D = sc.parallelize( generateInput4D( t => f4D(t), -10, 10, 100 ), 2 ).cache
-    
-    val validationRDD2D = sc.parallelize( generateInput2D( T => f(T), -10, 10, 100 ), 2).cache
-    val validationRDD3D = sc.parallelize( generateInput3D( (x,y) => f3D(x,y), -10, 10, -10, 10, 100 ), 2).cache
-    val validationRDD4D = sc.parallelize( generateInput4D( t => f4D(t), -10, 10, 100 ), 2 ).cache
-
-    if( graphic ) {
-
-      outputFrame2D.setData( testRDD2D.map( T => concat( T._1, T._2 ) ) )
-      outputFrame3D.setData( testRDD3D.map( T => concat( T._1, T._2 ) ) )
-      outputFrame4D.setData( testRDD4D.map( T => T._2 ) )
-
-    }
-    
-    val starttime = Calendar.getInstance().getTime()
-    println( "Start training " + starttime )
-
-    var model2D = ArtificialNeuralNetwork.train( testRDD2D, Array[Int](1, 3, 3, 1), 1000, 1.0)
-    var model3D = ArtificialNeuralNetwork.train( testRDD3D, Array[Int](2, 20, 1), 1000, 1.0)
-    var model4D = ArtificialNeuralNetwork.train( testRDD4D, Array[Int](1, 20, 3), 1000, 1.0 )
-
-    val noIt = 1500
-    var errHist = new Array[(Int,Double,Double,Double)]( noIt )
-
-    for( i <- 0 to noIt - 1 ) {
-
-      val predictedAndTarget2D = validationRDD2D.map( T => ( T._1, T._2, model2D.predictV( T._1 ) ) )
-      val predictedAndTarget3D = validationRDD3D.map( T => ( T._1, T._2, model3D.predictV( T._1 ) ) )
-      val predictedAndTarget4D = validationRDD4D.map( T => ( T._1, T._2, model4D.predictV( T._1 ) ) )
-
-      var err2D = predictedAndTarget2D.map( T =>
-        (T._3.toArray(0) - T._2.toArray(0))*(T._3.toArray(0) - T._2.toArray(0))
-      ).reduce( (u,v) => u + v )
-
-      var err3D = predictedAndTarget3D.map( T =>
-        (T._3.toArray(0) - T._2.toArray(0))*(T._3.toArray(0) - T._2.toArray(0))
-      ).reduce( (u,v) => u + v )
-
-      var err4D = predictedAndTarget4D.map( T => {
-
-        val v1 = T._2.toArray
-        val v2 = T._3.toArray
-
-        (v1(0) - v2(0))*(v1(0) - v2(0)) +
-        (v1(1) - v2(1))*(v1(1) - v2(1)) +
-        (v1(2) - v2(2))*(v1(2) - v2(2))
-
-      } ).reduce( (u,v) => u + v )
-
-
-      if( graphic ) {
-
-        val predicted2D = predictedAndTarget2D.map(
-          T => concat( T._1, T._3 )
-        )
-
-        val predicted3D = predictedAndTarget3D.map(
-          T => concat( T._1, T._3 )
-        )
-
-        val predicted4D = predictedAndTarget4D.map(
-          T => T._3
-        )
-
-        curAngle = curAngle + math.Pi/4
-        if( curAngle>=2*math.Pi ) {
-          curAngle = curAngle - 2*math.Pi
-        }
-
-        outputFrame3D.setAngle( curAngle )
-        outputFrame4D.setAngle( curAngle )
-
-        outputFrame2D.setApproxPoints( predicted2D )
-        outputFrame3D.setApproxPoints( predicted3D )
-        outputFrame4D.setApproxPoints( predicted4D )
-
-      }
-
-      println( "It. "+i+" ("+Calendar.getInstance().getTime()+"), Error 2D/3D/4D: " + (err2D, err3D, err4D) )
-      errHist(i) = ( i, err2D, err3D, err4D )
-
-      if( i < noIt - 1 ) {
-        model2D = ArtificialNeuralNetwork.train(testRDD2D, model2D, 1000, 1.0)
-        model3D = ArtificialNeuralNetwork.train(testRDD3D, model3D, 1000, 1.0)
-        model4D = ArtificialNeuralNetwork.train(testRDD4D, model4D, 1000, 1.0)
-      }
-
-    }
-
-    sc.stop
-
-    val stoptime = Calendar.getInstance().getTime()
-
-    for( i <- 0 to noIt - 1 ) {
-      println( errHist(i) )
-    }
-
-    println( formatter.format( starttime )+"-" + formatter.format( stoptime ) + " "+(stoptime.getTime-starttime.getTime+500)/1000+" seconds" )
-
-  }
-
-}

From 90195fa65cf240a7765fd990b5ebb40e0abb1ca0 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Mon, 22 Sep 2014 16:27:35 +0800
Subject: [PATCH 042/143] Create ANNDemo.scala

Demo of ANN with LBFGS. Can consider whether we want to keep this, as there is much overhead due to the graphical representation.
---
 .../org/apache/spark/examples/ANNDemo.scala   | 576 ++++++++++++++++++
 1 file changed, 576 insertions(+)
 create mode 100644 examples/src/main/scala/org/apache/spark/examples/ANNDemo.scala

diff --git a/examples/src/main/scala/org/apache/spark/examples/ANNDemo.scala b/examples/src/main/scala/org/apache/spark/examples/ANNDemo.scala
new file mode 100644
index 0000000000000..14e9a2d61cb10
--- /dev/null
+++ b/examples/src/main/scala/org/apache/spark/examples/ANNDemo.scala
@@ -0,0 +1,576 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.examples.mllib
+
+import java.awt._
+import java.awt.event._
+import java.text.SimpleDateFormat
+import java.util.Calendar
+import org.apache.spark._
+import org.apache.spark.mllib.ann._
+import org.apache.spark.mllib.linalg._
+import org.apache.spark.mllib.regression._
+import org.apache.spark.rdd.RDD
+import scala.Array.canBuildFrom
+import scala.util.Random
+
+object windowAdapter extends WindowAdapter {
+
+  override def windowClosing(e: WindowEvent) {
+    System.exit(0)
+  }
+
+}
+
+class OutputCanvas2D(wd: Int, ht: Int) extends Canvas {
+
+  var points: Array[Vector] = null
+  var approxPoints: Array[Vector] = null
+
+  /* input: rdd of (x,y) vectors */
+  def setData(rdd: RDD[Vector]) {
+    points = rdd.collect
+    repaint
+  }
+
+  def setApproxPoints(rdd: RDD[Vector]) {
+    approxPoints = rdd.collect
+    repaint
+  }
+
+  def plotDot(g: Graphics, x: Int, y: Int) {
+    val r = 5
+    val noSamp = 6*r
+    var x1 = x
+    var y1 = y + r
+    for(j <- 1 to noSamp) {
+      val x2 = (x.toDouble + math.sin(j.toDouble*2*math.Pi/noSamp)*r + .5).toInt
+      val y2 = (y.toDouble + math.cos(j.toDouble*2*math.Pi/noSamp)*r + .5).toInt
+      g.drawLine(x1, ht - y1, x2, ht - y2)
+      x1 = x2
+      y1 = y2
+    }
+  }
+
+  override def paint(g: Graphics) = {
+
+    var xmax: Double = 0.0
+    var xmin: Double = 0.0
+    var ymax: Double = 0.0
+    var ymin: Double = 0.0
+
+    if(points!=null) {
+
+      g.setColor(Color.black)
+      val x = points.map(T => (T.toArray)(0))
+      val y = points.map(T => (T.toArray)(1))
+
+      xmax = x.max
+      xmin = x.min
+      ymax = y.max
+      ymin = y.min
+
+      for(i <- 0 to x.size - 1) {
+
+        val xr = (((x(i).toDouble - xmin)/(xmax - xmin))*wd + .5).toInt
+        val yr = (((y(i).toDouble - ymin)/(ymax - ymin))*ht + .5).toInt
+        plotDot(g, xr, yr)
+
+      }
+
+      if(approxPoints != null) {
+
+        g.setColor(Color.red)
+        val x = approxPoints.map(T => (T.toArray)(0))
+        val y = approxPoints.map(T => (T.toArray)(1))
+
+        for(i <- 0 to x.size-1) {
+          val xr = (((x(i).toDouble - xmin)/(xmax - xmin))*wd + .5).toInt
+          val yr = (((y(i).toDouble - ymin)/(ymax - ymin))*ht + .5).toInt
+          plotDot(g, xr, yr)
+        }
+
+      }
+
+    }
+
+  }
+
+}
+
+class OutputFrame2D( title: String ) extends Frame( title ) {
+
+  val wd = 800
+  val ht = 600
+
+  var outputCanvas = new OutputCanvas2D( wd, ht )
+
+  def apply() {
+    addWindowListener(windowAdapter)
+    setSize(wd, ht)
+    add("Center", outputCanvas)
+    show()
+  }
+
+  def setData(rdd: RDD[Vector]) {
+    outputCanvas.setData(rdd)
+  }
+
+  def setApproxPoints(rdd: RDD[Vector]) {
+    outputCanvas.setApproxPoints(rdd)
+  }
+
+
+}
+
+object windowAdapter3D extends WindowAdapter {
+
+  override def windowClosing(e: WindowEvent) {
+    System.exit(0)
+  }
+
+}
+
+class OutputCanvas3D(wd: Int, ht: Int, shadowFrac: Double) extends Canvas {
+
+  var points: Array[Vector] = null
+  var approxPoints: Array[Vector] = null
+  var angle: Double = 0.0
+
+  /* 3 dimensional (x,y,z) vector */
+  def setData(rdd: RDD[Vector]) {
+    points = rdd.collect
+    repaint
+  }
+
+  def setAngle(angle: Double) {
+    this.angle = angle
+    repaint
+  }
+
+
+  def setApproxPoints(rdd: RDD[Vector]) {
+    approxPoints = rdd.collect
+    repaint
+  }
+
+  def plotDot(g: Graphics, x: Int, y: Int) {
+    val r = 5
+    val noSamp = 6*r
+    var x1 = x
+    var y1 = y + r
+    for( j <- 1 to noSamp ) {
+      val x2 = (x.toDouble + math.sin( j.toDouble*2*math.Pi/noSamp )*r + .5).toInt
+      val y2 = (y.toDouble + math.cos( j.toDouble*2*math.Pi/noSamp )*r + .5).toInt
+      g.drawLine(x1, ht - y1, x2, ht - y2)
+      x1 = x2
+      y1 = y2
+    }
+  }
+
+  def plotLine(g: Graphics, x1: Int, y1: Int, x2: Int, y2: Int) {
+    g.drawLine(x1, ht - y1, x2, ht - y2)
+  }
+
+  def calcCord(arr: Array[Double], angle: Double):
+      (Double, Double, Double, Double, Double, Double) = {
+
+    var arrOut = new Array[Double](6)
+
+    val x = arr(0)*math.cos(angle) - arr(1)*math.sin(angle)
+    val y = arr(0)*math.sin(angle) + arr(1)*math.cos(angle)
+    val z = arr(2)
+
+    val x0 = arr(0)*math.cos(angle) - arr(1)*math.sin(angle)
+    val y0 = arr(0)*math.sin(angle) + arr(1)*math.cos(angle)
+    val z0 = 0
+
+    val xs = (arr(0) + shadowFrac*arr(2))*math.cos(angle) - arr(1)*math.sin(angle)
+    val ys = (arr(0) + shadowFrac*arr(2))*math.sin(angle) + arr(1)*math.cos(angle)
+    val zs = 0
+
+    arrOut(0) = y - .5*x
+    arrOut(1) = z - .25*x
+
+    arrOut(2) = y0 - .5*x0
+    arrOut(3) = z0 - .25*x0
+
+    arrOut(4) = ys - .5*xs
+    arrOut(5) = zs - .25*xs
+
+    (arrOut(0), arrOut(1), arrOut(2), arrOut(3), arrOut(4), arrOut(5))
+
+  }
+
+  override def paint(g: Graphics) = {
+
+    if(points!=null) {
+
+      var p = points.map(T => calcCord(T.toArray, angle)).toArray
+
+      var xmax = p(0)._1
+      var xmin = p(0)._1
+      var ymax = p(0)._2
+      var ymin = p(0)._2
+
+      for(i <- 0 to p.size-1) {
+
+        if(xmax<p(i)._1) {
+          xmax = p(i)._1
+        }
+        if(xmax<p(i)._3) {
+          xmax = p(i)._3
+        }
+        if(xmax<p(i)._5) {
+          xmax = p(i)._5
+        }
+
+        if(xmin>p(i)._1) {
+          xmin = p(i)._1
+        }
+        if(xmin>p(i)._3) {
+          xmin = p(i)._3
+        }
+        if(xmin>p(i)._5) {
+          xmin = p(i)._5
+        }
+
+        if(ymax<p(i)._2) {
+          ymax = p(i)._2
+        }
+        if(ymax<p(i)._4) {
+          ymax = p(i)._4
+        }
+        if(ymax<p(i)._6) {
+          ymax = p(i)._6
+        }
+
+        if(ymin>p(i)._2) {
+          ymin = p(i)._2
+        }
+        if(ymin>p(i)._4) {
+          ymin = p(i)._4
+        }
+        if(ymin>p(i)._6) {
+          ymin = p(i)._6
+        }
+
+      }
+
+      for(i <- 0 to p.size-1) {
+
+        var x_ = (((p(i)._1 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
+        var y_ = (((p(i)._2 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
+        var x0 = (((p(i)._3 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
+        var y0 = (((p(i)._4 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
+        var xs = (((p(i)._5 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
+        var ys = (((p(i)._6 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
+
+        g.setColor(Color.black)
+        plotDot(g, x_, y_)
+        plotLine(g, x_, y_, x0, y0)
+        g.setColor(Color.gray)
+        plotLine(g, x0, y0, xs, ys)
+
+      }
+
+      if(approxPoints != null) {
+
+        var p = approxPoints.map(T => calcCord(T.toArray, angle))
+
+        for(i <- 0 to p.size-1) {
+
+          var x_ = (((p(i)._1 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
+          var y_ = (((p(i)._2 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
+          var x0 = (((p(i)._3 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
+          var y0 = (((p(i)._4 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
+          var xs = (((p(i)._5 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
+          var ys = (((p(i)._6 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
+
+          g.setColor(Color.red)
+          plotDot(g, x_, y_)
+          plotLine(g, x_, y_, x0, y0)
+          g.setColor(Color.magenta)
+          plotLine(g, x0, y0, xs, ys)
+
+        }
+
+      }
+
+    }
+  }
+}
+
+class OutputFrame3D(title: String, shadowFrac: Double) extends Frame(title) {
+
+  val wd = 800
+  val ht = 600
+
+  def this(title: String) = this(title, .25)
+
+  var outputCanvas = new OutputCanvas3D(wd, ht, shadowFrac)
+
+  def apply() {
+    addWindowListener(windowAdapter3D)
+    setSize(wd, ht)
+    add("Center", outputCanvas)
+    show()
+  }
+
+  def setData(rdd: RDD[Vector]) {
+    outputCanvas.setData(rdd)
+  }
+
+  def setAngle(angle: Double) {
+    outputCanvas.setAngle(angle)
+  }
+
+  def setApproxPoints(rdd: RDD[Vector]) {
+    outputCanvas.setApproxPoints(rdd)
+  }
+
+}
+
+object ANNDemo {
+
+  var rand = new Random(0)
+
+  def generateInput2D(f: Double => Double, xmin: Double, xmax: Double, noPoints: Int):
+      Array[(Vector,Vector)] =
+  {
+
+    var out = new Array[(Vector,Vector)](noPoints)
+
+    for(i <- 0 to noPoints - 1) {
+      val x = xmin + rand.nextDouble()*(xmax - xmin)
+      val y = f(x)
+      out(i) = (Vectors.dense(x), Vectors.dense(y))
+    }
+
+    return out
+
+  }
+
+
+  def generateInput3D(f: (Double,Double) => Double, xmin: Double, xmax: Double, 
+      ymin: Double, ymax: Double, noPoints: Int): Array[(Vector,Vector)] = {
+
+    var out = new Array[(Vector,Vector)](noPoints)
+
+    for(i <- 0 to noPoints - 1) {
+
+      val x = xmin + rand.nextDouble()*(xmax - xmin)
+      val y = ymin + rand.nextDouble()*(ymax - ymin)
+      val z = f(x, y)
+
+      var arr = new Array[Double](2)
+
+      arr(0) = x
+      arr(1) = y
+      out(i) = (Vectors.dense(arr), Vectors.dense(z))
+
+    }
+
+    out
+
+  }
+
+  def generateInput4D(f: Double => (Double,Double,Double),
+      tmin: Double, tmax: Double, noPoints: Int): Array[(Vector,Vector)] = {
+
+    var out = new Array[(Vector,Vector)](noPoints)
+
+    for(i <- 0 to noPoints - 1) {
+
+      val t: Double = tmin + rand.nextDouble()*(tmax - tmin)
+      var arr = new Array[Double](3)
+      var F = f(t)
+
+      arr(0) = F._1
+      arr(1) = F._2
+      arr(2) = F._3
+
+      out(i) = (Vectors.dense(t), Vectors.dense(arr))
+    }
+
+    out
+
+  }
+
+  def f( T: Double ): Double = {
+    val y = 0.5 + Math.abs(T/5).toInt.toDouble*.15 + math.sin(T*math.Pi/10)*.1
+    assert(y <= 1)
+    y
+  }
+
+  def f3D(x: Double, y: Double): Double = {
+    .5 + .24*Math.sin(x*2*math.Pi/10) + .24*Math.cos(y*2*math.Pi/10)
+  }
+
+  def f4D(t: Double): (Double, Double,Double) = {
+    val x = Math.abs(.8*Math.cos(t*2*math.Pi/20)) + .1
+    val y = (11 + t)/22
+    val z = .5 + .35*Math.sin(t*2*math.Pi/5)*Math.cos( t*2*math.Pi/10 ) + .15*t/11
+    (x, y, z)
+  }
+
+  def concat(v1: Vector, v2: Vector): Vector = {
+
+    var a1 = v1.toArray
+    var a2 = v2.toArray
+    var a3 = new Array[Double](a1.size + a2.size)
+
+    for(i <- 0 to a1.size - 1) {
+      a3(i) = a1(i)
+    }
+
+    for(i <- 0 to a2.size - 1) {
+      a3(i + a1.size) = a2(i)
+    }
+
+    Vectors.dense(a3)
+
+  }
+
+  def main(arg: Array[String]) {
+
+    println("ANN demo")
+    println
+
+    val formatter = new SimpleDateFormat("hh:mm:ss")
+
+    var curAngle: Double = 0.0
+
+    var outputFrame2D: OutputFrame2D = null
+    var outputFrame3D: OutputFrame3D = null
+    var outputFrame4D: OutputFrame3D = null
+
+    outputFrame2D = new OutputFrame2D("x -> y")
+    outputFrame2D.apply
+
+    outputFrame3D = new OutputFrame3D("(x,y) -> z", 1)
+    outputFrame3D.apply
+
+    outputFrame4D = new OutputFrame3D("t -> (x,y,z)")
+    outputFrame4D.apply
+
+    var A = 20.0
+    var B = 50.0
+
+    var conf = new SparkConf().setAppName("Parallel ANN").setMaster("local[1]")
+    var sc = new SparkContext(conf)
+
+    val testRDD2D =
+      sc.parallelize(generateInput2D( T => f(T), -10, 10, 100 ), 2).cache
+    val testRDD3D =
+      sc.parallelize(generateInput3D((x,y) => f3D(x,y), -10, 10, -10, 10, 200 ), 2).cache
+    val testRDD4D =
+      sc.parallelize( generateInput4D( t => f4D(t), -10, 10, 100 ), 2 ).cache
+
+    val validationRDD2D =
+      sc.parallelize(generateInput2D( T => f(T), -10, 10, 100 ), 2).cache
+    val validationRDD3D = 
+      sc.parallelize(generateInput3D( (x,y) => f3D(x,y), -10, 10, -10, 10, 100 ), 2).cache
+    val validationRDD4D =
+      sc.parallelize( generateInput4D( t => f4D(t), -10, 10, 100 ), 2 ).cache
+
+    outputFrame2D.setData( testRDD2D.map( T => concat( T._1, T._2 ) ) )
+    outputFrame3D.setData( testRDD3D.map( T => concat( T._1, T._2 ) ) )
+    outputFrame4D.setData( testRDD4D.map( T => T._2 ) )
+
+    var starttime = Calendar.getInstance().getTime()
+    println("Training 2D")
+    var model2D = ArtificialNeuralNetwork.train(testRDD2D, Array[Int](1, 5, 3, 1), 1000, 1e-8)
+    var stoptime = Calendar.getInstance().getTime()
+    println(((stoptime.getTime-starttime.getTime + 500) / 1000) + "s")
+
+    starttime = stoptime
+    println("Training 3D")
+    var model3D = ArtificialNeuralNetwork.train(testRDD3D, Array[Int](2, 20, 1), 1000, 1e-8)
+    stoptime = Calendar.getInstance().getTime()
+    println(((stoptime.getTime-starttime.getTime + 500) / 1000) + "s")
+
+    starttime = stoptime
+    println("Training 4D")
+    var model4D = ArtificialNeuralNetwork.train(testRDD4D, Array[Int](1, 20, 3), 1000, 1e-8)
+    stoptime = Calendar.getInstance().getTime()
+    println(((stoptime.getTime-starttime.getTime + 500) / 1000) + "s")
+
+    val predictedAndTarget2D = validationRDD2D.map(T => (T._1, T._2, model2D.predictV(T._1)))
+    val predictedAndTarget3D = validationRDD3D.map(T => (T._1, T._2, model3D.predictV(T._1)))
+    val predictedAndTarget4D = validationRDD4D.map(T => (T._1, T._2, model4D.predictV(T._1)))
+
+    var err2D = predictedAndTarget2D.map( T =>
+      (T._3.toArray(0) - T._2.toArray(0))*(T._3.toArray(0) - T._2.toArray(0))
+    ).reduce((u,v) => u + v)
+
+    var err3D = predictedAndTarget3D.map( T =>
+      (T._3.toArray(0) - T._2.toArray(0))*(T._3.toArray(0) - T._2.toArray(0))
+    ).reduce((u,v) => u + v)
+
+    var err4D = predictedAndTarget4D.map(T => {
+
+      val v1 = T._2.toArray
+      val v2 = T._3.toArray
+
+      (v1(0) - v2(0)) * (v1(0) - v2(0)) +
+      (v1(1) - v2(1)) * (v1(1) - v2(1)) +
+      (v1(2) - v2(2)) * (v1(2) - v2(2))
+
+    }).reduce((u,v) => u + v)
+
+    println("Error 2D/3D/4D: " + (err2D, err3D, err4D))
+
+    val predicted2D = predictedAndTarget2D.map(
+      T => concat(T._1, T._3)
+    )
+
+    val predicted3D = predictedAndTarget3D.map(
+      T => concat(T._1, T._3)
+    )
+
+    val predicted4D = predictedAndTarget4D.map(
+      T => T._3
+    )
+
+    outputFrame2D.setApproxPoints(predicted2D)
+    outputFrame3D.setApproxPoints(predicted3D)
+    outputFrame4D.setApproxPoints(predicted4D)
+
+    while(true) { // stops when closing the window
+
+      curAngle = curAngle + math.Pi/4
+      if(curAngle >= 2*math.Pi) {
+        curAngle = curAngle - 2*math.Pi
+      }
+
+      outputFrame3D.setAngle(curAngle)
+      outputFrame4D.setAngle(curAngle)
+
+      outputFrame3D.repaint
+      outputFrame4D.repaint
+
+      Thread.sleep(3000)
+
+    }
+
+    sc.stop
+
+  }
+
+}

From 7c902495c3936018c8363349a68c8eb04aa0ceb6 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Mon, 22 Sep 2014 16:28:56 +0800
Subject: [PATCH 043/143] Update mllib-ann.md

Update with back-propagation and LBFGS.
---
 docs/mllib-ann.md | 243 +++++++++++++++++++++++-----------------------
 1 file changed, 123 insertions(+), 120 deletions(-)

diff --git a/docs/mllib-ann.md b/docs/mllib-ann.md
index 005a5be4987d9..4a1acd5f380da 100644
--- a/docs/mllib-ann.md
+++ b/docs/mllib-ann.md
@@ -10,167 +10,170 @@ This document describes the MLlib's Artificial Neural Network (ANN) implementati
 
 The implementation currently consist of the following files:
 
-* 'ParallelANN.scala': implements the ANN
-* 'GeneralizedSteepestDescentAlgorithm.scala': provides an abstract class and model as basis for 'ParallelANN'.
-
-In addition, there is a demo/test available:
-
-* 'TestParallelANN.scala': tests parallel ANNs for various functions
-* 'TestParallelANNgraphics.scala': graphical output for 'TestParallelANN.scala'
+* 'ArtificialNeuralNetwork.scala': implements the ANN
+* 'ANNSuite': implements automated tests for the ANN and its gradient
+* 'ANNDemo': a demo that approximates three functions and shows a graphical representation of
+the result
 
 # Architecture and Notation
 
-The file ParallelANN.scala implements a three-layer ANN with the following architecture:
+The file ArtificialNeuralNetwork.scala implements the ANN. The following picture shows the
+architecture of a 3-layer ANN:
 
 ```
  +-------+
  |       |
- |  X_0  |
+ | N_0,0 |
  |       | 
- +-------+       +-------+
-                 |       |
- +-------+       |  H_0  |      +-------+
- |       |       |       |      |       |
- |  X_1  |-      +-------+    ->|  O_0  |
- |       | \ Vij             /  |       |
- +-------+  -    +-------+  -   +-------+
-              \  |       | / Wjk
-     :         ->|  H_1  |-     +-------+
-     :           |       |      |       |
-     :           +-------+      |  O_1  |
-     :                          |       |
-     :               :          +-------+
-     :               :
-     :               :              :
-     :               : 
-     :               :          +-------+
-     :               :          |       |
-     :               :          | O_K-1 |
-     :                          |       |
-     :           +-------+      +-------+
-     :           |       |
-     :           | H_J-1 |
-                 |       |
- +-------+       +-------+
+ +-------+        +-------+
+                  |       |
+ +-------+        | N_0,1 |       +-------+
+ |       |        |       |       |       |
+ | N_1,0 |-       +-------+     ->| N_0,2 |
+ |       | \ Wij1              /  |       |
+ +-------+  --    +-------+  --   +-------+
+               \  |       | / Wjk2
+     :          ->| N_1,1 |-      +-------+
+     :            |       |       |       |
+     :            +-------+       | N_1,2 |
+     :                            |       |
+     :                :           +-------+
+     :                :
+     :                :                :
+     :                : 
+     :                :           +-------+
+     :                :           |       |
+     :                :           |N_K-1,2|
+     :                            |       |
+     :            +-------+       +-------+
+     :            |       |
+     :            |N_J-1,1|
+                  |       |
+ +-------+        +-------+
  |       | 
- | X_I-1 |  
+ |N_I-1,0|  
  |       |
  +-------+
 
- +-------+      +--------+
- |       |      |        |
- |   -1  |      |   -1   |
- |       |      |        |
- +-------+      +--------+
+ +-------+        +--------+
+ |       |        |        |
+ |   -1  |        |   -1   |
+ |       |        |        |
+ +-------+        +--------+
 
-INPUT LAYER     HIDDEN LAYER    OUTPUT LAYER
+INPUT LAYER      HIDDEN LAYER    OUTPUT LAYER
 ```
 
-The nodes X_0 to X_{I-1} are the I input nodes. The nodes H_0 to H_{J-1} are the J hidden nodes and the nodes O_0 to O_{K-1} are the K output nodes. Between each input node X_i and hidden node H_j there is a weight V_{ij}. Likewise, between each hidden node H_j and each output node O_k is a weight W_{jk}. 
-
-The ANN also implements two bias units. These are nodes that always output the value -1. The bias units are in the input and in the hidden layer. They act as normal nodes, except that the bias unit in the hidden layer has no input. The bias units can also be denoted by X_I and H_J.
+The i-th node in layer l is denoted by N_{i,l}, both i and l starting with 0. The weight
+between node i in layer l-1 and node j in layer l is denoted by Wijl. Layer 0 is the input
+layer, whereas layer L is the output layer.
 
-The value of a hidden node H_j is calculated as follows:
+The ANN also implements bias units. These are nodes that always output the value -1. The bias
+units are in all layers except the output layer. They act similar to other nodes, but do not
+have input.
 
-`$H_j = g ( \sum_{i=0}^{I} X_i*V_{i,j} )$`
+The value of node N_{j,l} is calculated  as follows:
 
-Likewise, the value of the output node O_k is calculated as follows:
-
-`$O_k = g( \sum_{j=0}^{J} H_j*W_{j,k} )$`
+`$N_{j,l} = g( \sum_{i=0}^{topology_l} W_{i,j,l)*N_{i,l-1} )$`
 
 Where g is the sigmod function
 
 `$g(t) = \frac{e^{\beta t} }{1+e^{\beta t}}$`
 
-and `$\beta$` defines the steepness of g.
-
-# Gradient descent
-
-Currently, the MLlib uses gradent descent for training. This means that the weights V_{ij} and W_{jk} are updated by adding a fraction of the gradient to V_{ij} and W_{jk} of the following function:
-
-`$E = \sum_{k=0}^{K-1} (O_k - Y_k )^2$`
-
-where Y_k is the target output given inputs X_0 ... X_{I-1}
-
-Calculations provide that:
+# LBFGS
 
-`$\frac{\partial E}{\partial W_{jk}} = 2 (O_k-Y_k) \cdot H_j \cdot g' \left( \sum_{m=0}^{J} W_{mk} H_m \right)$`
+MLlib uses the LBFGS algorithm for training. It minimises the following error function:
 
-and
+`$E = \sum_{k=0}^{K-1} (N_{k,L} - Y_k )^2$`
 
-`$\frac{\partial E}{\partial V_{ij}} = 2 \sum_{k=0}^{K-1} \left( (O_k - Y_k)  \cdot X_i \cdot W_{jk} \cdot g'\left( \sum_{n=0}^{J} W_{nk} H_n \right) g'\left( \sum_{m=0}^{I} V_{mj} X_i \right) \right)$`
-
-The training step consists of the two operations
-
-`$V_{ij} = V_{ij} - \epsilon \frac{\partial E}{\partial V_{ij}}$`
-
-and
-
-`$W_{jk} = W_{jk} - \epsilon \frac{\partial E}{\partial W_{jk}}$`
-
-where `$\epsilon$` is the step size.
+where Y_k is the target output given inputs N_{0,0} ... N_{I-1,0}.
 
 # Implementation Details
 
-## The 'ParallelANN' class
-
-The 'ParallelANN' class is the main class of the ANN. This class uses a trait 'ANN', which includes functions for calculating the hidden layer ('computeHidden') and calculation of the output ('computeValues'). The output of 'computeHidden' includes the bias node in the hidden layer, such that it does not need to handle the hidden bias node differently.
-
-The 'ParallelANN' class has the following constructors:
-
-* `ParallelANN( stepSize, numIterations, miniBatchFraction, noInput, noHidden, noOutput, beta )`
-* `ParallelANN()`: assumes 'stepSize'=1.0, 'numIterations'=100, 'miniBatchFraction'=1.0, 'noInput'=1, 'noHidden'=5, noOutput'=1, 'beta'=1.0.
-* `ParallelANN( noHidden )`: as 'ParallelANN()', but allows specification of 'noHidden'
-* `ParallelANN( noInput, noHidden )`: as 'ParallelANN()', but allows specification of number of 'noInput' and 'noHidden'
-* `ParallelANN( noInput, noHidden, noOutput )`: as 'ParallelANN()', but allows specification of 'noInput', 'noHidden' and 'noOutput'
-
-The number of input nodes I is stored in the variable 'noInput', the number of hidden nodes J is stored in 'noHidden' and the number of output nodes K is stored in 'noOutput'. 'beta' contains the value of `$\beta$` for the sigmoid function.
-
-The parameters 'stepSize', 'numIterations' and 'miniBatchFraction' are of use for the Statistical Gradient Descent function.
-
-In addition, it has a single vector 'weights' corresponding to V_{ij} and W_{jk}. The mapping of V_{ij} and W_{jk} into 'weights' is as follows:
-
-* V_{ij} -> `weights[  i + j*(noInput+1) ]`
-* W_{jk} -> `weights[ (noInput+1)*noHidden + j + k*(noHidden+1) ]`
-
-The training function carries the name 'train'. It can take various inputs:
-
-* `def train( rdd: RDD[(Vector,Vector)] )`: starts a complete new training session and generates a new ANN.
-* `def train( rdd: RDD[(Vector,Vector)], model: ParallelANNModel )`: continues a training session with an existing ANN.
-* `def train( rdd: RDD[(Vector,Vector)], weights: Vector )`: starts a training session using initial weights as indicated by 'weights'.
-
-The input of the training function is an RDD with (input/output) training pairs, each input and output being stored as a 'Vector'. The training function returns a variable of from class 'ParallelANNModel', as described below.
-
-## The 'ParallelANNModel' class
-
-All information needed for the ANN is stored in the 'ParallelANNModel' class. The training function 'train' from 'ParallelANN' returns an object from the 'ParallelANNModel' class.
-
-The information in 'parallelANNModel' consist of the weights, the number of input, hidden and output nodes, as well as two functions 'predictPoint' and 'predictPointV'.
-
-The 'predictPoint' function is used to calculate a single output value as a 'Double'. If the output of the ANN actually is a vector, it returns just the first element of the vector, that is O_0. The output of the 'predictPointV' is of type 'Vector', and returns all K output values.
-
-## The 'GeneralizedSteepestDescentAlgorithm' class
-
-The 'GeneralizedSteepestDescendAlgorithm' class is based on the 'GeneralizedLinearAlgorithm' class. The main difference is that the 'GeneralizedSteepestDescentAlgorithm' is based on output values of type 'Vector', whereas 'GeneralizedLinearAlgorithm' is based of output values of type 'Double'. The new class was needed, because an ANN ideally outputs multiple values, hence a 'Vector'.
+## The `ArtificialNeuralNetwork` class
+
+The `ArtificialNeuralNetwork` class has the following constructor:
+
+`class ArtificialNeuralNetwork private(topology: Array[Int], maxNumIterations: Int,
+convergenceTol: Double)`
+
+* `topology` is an array of integers indicating then number of nodes per layer. For example, if
+`topology` holds `(3, 5, 1)`, it means that there are three input nodes, five nodes in a single
+hidden layer and 1 output node.
+* `maxNumIterations` indicates the number of iterations after which the LBFGS algorithm must
+have stopped.
+* `convergenceTol` indicates the acceptable error, and if reached the LBFGS algorithm will
+stop. A lower number of `convergenceTol` will give a higher precision.
+
+There is also an object `ArtificialNeuralNetwork`. This object contains the training function.
+There are six different instances of the training function, each for use with different
+parameters. All take as the first parameter the RDD `input`, which contains pairs of input and
+output vectors.
+
+* `def train(input: RDD[(Vector, Vector)], topology: Array[Int], maxNumIterations: Int):
+ArtificialNeuralNetworkModel`: starts training with random initial weights, and a default
+`convergenceTol`=1e-5.
+* `def train(input: RDD[(Vector, Vector)], topology: Array[Int], initialWeights: Vector,
+maxNumIterations: Int): ArtificialNeuralNetworkModel`: starts training with given initial
+weights, and a default `convergenceTol`=1e-5.
+* `def train(input: RDD[(Vector, Vector)], model: ArtificialNeuralNetworkModel,
+maxNumIterations: Int): ArtificialNeuralNetworkModel`: resumes training given an earlier
+calculated model, and a default `convergenceTol`=1e-5.
+* `def train(input: RDD[(Vector, Vector)], topology: Array[Int], maxNumIterations: Int,
+convergenceTol: Double): ArtificialNeuralNetworkModel`: starts training with random initial
+weights. Allows setting a customised `convergenceTol`.
+* `def train(input: RDD[(Vector, Vector)], topology: Array[Int], initialWeights: Vector,
+maxNumIterations: Int, convergenceTol: Double): ArtificialNeuralNetworkModel`: starts training
+with given initial weights. Allows setting a customised `convergenceTol`.
+* `def train(input: RDD[(Vector, Vector)], model: ArtificialNeuralNetworkModel,
+maxNumIterations: Int, convergenceTol: Double): ArtificialNeuralNetworkModel`: resumes training
+given an earlier calculated model. Allows setting a customised `convergenceTol`.
+
+All training functions return the trained ANN using the class `ArtificialNeuralNetworkModel`.
+This class has the following function:
+
+* `predictV(testData: Vector): Vector` calculates the output vector given input vector
+`testData`.
+
+The weights use dby `predictV` come from the model.
 
 ## Training
 
-Science has provided many different strategies to train an ANN. Hence it is important that the optimising functions in MLlib's ANN are interchangeable. The ParallelANN class has a variable 'optimizer', which is currently set to a 'GradientDescent' optimising class. The 'GradientDescent' optimising class implements a stochastic gradient descent method, and is also used for other optimisation technologies in Spark. It is expected that other optimising functions will be defined for Spark, and these can be stored in the 'optimizer' variable.
+We have chosen to implement the ANN with LBFGS as optimiser function. We compared it with
+Statistical Gradient Descent. LBGFS was much faster, but in accordance is also earlier with
+overfitting.
+
+Science has provided many different strategies to train an ANN. Hence it is important that the
+optimising functions in MLlib's ANN are interchangeable. A new optimisation strategy can be
+implemented by creating a new class descending from ArtificialNeuralNetwork, and replacing the
+optimiser, updater and possibly gradient as required.
 
-# Demo/test
+# Demo and tests
 
-Usage of MLlib's ANN is demonstrated through the 'TestParallelANN' demo program. The program generates three functions:
+Usage of MLlib's ANN is demonstrated through the 'ANNDemo' demo program. The program generates
+three functions:
 
 * f2d: x -> y
 * f3d: (x,y) -> z
 * f4d: t -> (x,y,z)
 
-When the program is given the Java argument 'graph', it will show a graphical representation of the target function and the latest values.
+It will calculate an approximation of the target function, and show a graphical representation
+of the training set and the results after applying the testing set.
+
+In addition, there are the following tests:
+
+* "ANN learns XOR function": tests that the ANN can properly approximate an XOR function.
+* "Gradient of ANN": tests that the output of the ANN gradient is roughly equal to an
+approximated gradient.
 
 # Conclusion
 
-The 'ParallelANN' class implements a Artificial Neural Network (ANN), using the stochastic gradient descent method. It takes as input an RDD of input/output values of type 'Vector', and returns an object of type 'ParallelANNModel' containing the parameters of the trained ANN. The 'ParallelANNModel' object can also be used to calculate results after training.
+The 'AritificalNeuralNetwork' class implements a Artificial Neural Network (ANN), using the
+LBFGS algorithm. It takes as input an RDD of input/output values of type 'Vector', and returns
+an object of type 'ArtificialNeuralNetworkModel' containing the parameters of the trained ANN.
+The 'ArtificialNeuralNetworkModel' object can also be used to calculate results after training.
 
-The training of an ANN can be interrupted and later continued, allowing intermediate inspection of the results.
+The training of an ANN can be interrupted and later continued, allowing intermediate inspection
+of the results.
 
-A demo program for ANN is provided.
+A demo program and tests for ANN are provided.

From 87f630b3bc2244620658e53e79a613fd1e54d645 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Mon, 22 Sep 2014 16:30:08 +0800
Subject: [PATCH 044/143] Update mllib-ann.md

Fixed sigmoid
---
 docs/mllib-ann.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/mllib-ann.md b/docs/mllib-ann.md
index 4a1acd5f380da..ac42446276255 100644
--- a/docs/mllib-ann.md
+++ b/docs/mllib-ann.md
@@ -78,7 +78,7 @@ The value of node N_{j,l} is calculated  as follows:
 
 Where g is the sigmod function
 
-`$g(t) = \frac{e^{\beta t} }{1+e^{\beta t}}$`
+`$g(t) = \frac{1}{1+e^{-t}}$`
 
 # LBFGS
 

From 986f37a7de75d915ea4d7828b077ee9ff68836dc Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Tue, 23 Sep 2014 10:20:54 +0800
Subject: [PATCH 045/143] Update ArtificialNeuralNetwork.scala

Fixed erroneously reverting ANNHelper trait
---
 .../mllib/ann/ArtificialNeuralNetwork.scala   | 168 +++++++-----------
 1 file changed, 64 insertions(+), 104 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index fe200f8c1aae2..d99e78363ae2e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -69,62 +69,17 @@ import org.apache.spark.util.random.XORShiftRandom
  */
 
 class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topology: Array[Int])
-  extends Serializable {
-
-  private val L = topology.length - 1
-
-  private val ofsWeight: Array[Int] = {
-    val tmp = new Array[Int](L + 1)
-    var curPos = 0
-    tmp(0) = 0
-    var l = 1
-    while(l <= L) {
-      tmp(l) = curPos
-      curPos = curPos + (topology(l - 1) + 1) * (topology(l))
-      l += 1
-    }
-    tmp
-  }
-
-  private def g(x: Double) = 1.0 / (1.0 + math.exp(-x))
+  extends Serializable with ANNHelper {
 
   def computeValues(arrData: Array[Double], arrWeights: Array[Double]): Array[Double] = {
-    var arrPrev = new Array[Double](topology(0))
-    var i: Int = 0
-    var j: Int = 0
-    var l: Int = 0
-    i = 0
-    while(i < topology(0)) {
-      arrPrev(i) = arrData(i)
-      i += 1
-    }
-    l = 1
-    while(l <= L) {
-      val arrCur = new Array[Double](topology(l))
-      j = 0
-      while(j < topology(l)) {
-        var cum: Double = 0.0
-        i = 0
-        while( i < topology(l - 1) ) {
-          cum = cum +
-            arrPrev(i) * arrWeights(ofsWeight(l) + (topology(l - 1) + 1) * j + i)
-          i += 1
-        }
-        cum = cum +
-          arrWeights(ofsWeight(l) + (topology(l - 1) + 1) * j + topology(l - 1)) // bias
-        arrCur(j) = g(cum)
-        j += 1
-      }
-      arrPrev = arrCur
-      l += 1
-    }
-    arrPrev
+    val arrNodes = forwardRun(arrData, arrWeights)
+    arrNodes.slice(arrNodes.size - topology(L), arrNodes.size)
   }
 
   /**
    * Predict values for a single data point using the model trained.
    *
-   * @param testData Vector representing a single data point
+   * @param testData array representing a single data point
    * @return Vector prediction from the trained model
    *
    *         Returns the complete vector.
@@ -141,14 +96,14 @@ class ArtificialNeuralNetwork private(
     convergenceTol: Double)
   extends Serializable {
 
-  private var gradient: Gradient = new ANNLeastSquaresGradient(topology)
-  private var updater: Updater = new ANNUpdater()
+  private val gradient = new ANNLeastSquaresGradient(topology)
+  private val updater = new ANNUpdater()
   private var optimizer: Optimizer = new LBFGS(gradient, updater).
-    setConvergenceTol( convergenceTol ).
-    setMaxNumIterations(maxNumIterations)
+    setConvergenceTol(convergenceTol).
+    setNumIterations(maxNumIterations)
 
   private def run(input: RDD[(Vector, Vector)], initialWeights: Vector):
-      ArtificialNeuralNetworkModel = {
+  ArtificialNeuralNetworkModel = {
     val data = input.map(v =>
       (0.0,
         Vectors.fromBreeze(DenseVector.vertcat(
@@ -158,13 +113,10 @@ class ArtificialNeuralNetwork private(
     val weights = optimizer.optimize(data, initialWeights)
     new ArtificialNeuralNetworkModel(weights, topology)
   }
-
 }
 
 object ArtificialNeuralNetwork {
 
-  var optimizer: Optimizer = null;
-
   def train(
       input: RDD[(Vector, Vector)],
       topology: Array[Int],
@@ -211,7 +163,7 @@ object ArtificialNeuralNetwork {
       topology: Array[Int],
       maxNumIterations: Int,
       convergenceTol: Double): ArtificialNeuralNetworkModel = {
-    train(input, topology, randomWeights(topology), maxNumIterations, convergenceTol)
+        train(input, topology, randomWeights(topology), maxNumIterations, convergenceTol)
   }
 
   def randomWeights(topology: Array[Int]): Vector = {
@@ -223,7 +175,7 @@ object ArtificialNeuralNetwork {
     val noWeights = {
       var tmp = 0
       var i = 1
-      while(i < topology.size) {
+      while (i < topology.size) {
         tmp = tmp + topology(i) * (topology(i - 1) + 1)
         i += 1
       }
@@ -231,95 +183,88 @@ object ArtificialNeuralNetwork {
     }
 
     val initialWeightsArr = new Array[Double](noWeights)
-    var pos = 0;
+    var pos = 0
 
     l = 1
-    while( l < topology.length) {
+    while (l < topology.length) {
       i = 0
-      while(i < (topology(l) * (topology(l - 1) + 1))) {
+      while (i < (topology(l) * (topology(l - 1) + 1))) {
         initialWeightsArr(pos) = (rand.nextDouble * 4.8 - 2.4) / (topology(l - 1) + 1)
-        pos += 1;
+        pos += 1
         i += 1
       }
       l += 1
     }
     Vectors.dense(initialWeightsArr)
   }
-
 }
 
-private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
+private[ann] trait ANNHelper {
+  protected val topology: Array[Int]
+  protected def g(x: Double) = 1.0 / (1.0 + math.exp(-x))
+  protected val L = topology.length - 1
 
-  private def g(x: Double) = 1.0 / (1.0 + math.exp(-x))
-
-  private val L = topology.length - 1
-
-  private val noWeights = {
+  protected val noWeights = {
     var tmp = 0
     var l = 1
-    while(l <= L) {
+    while (l <= L) {
       tmp = tmp + topology(l) * (topology(l - 1) + 1)
       l += 1
     }
     tmp
   }
 
-  val ofsWeight: Array[Int] = {
+  protected val ofsWeight: Array[Int] = {
     val tmp = new Array[Int](L + 1)
-    var curPos = 0;
-    tmp(0) = 0;
+    var curPos = 0
+    tmp(0) = 0
     var l = 1
-    while(l <= L) {
+    while (l <= L) {
       tmp(l) = curPos
-      curPos = curPos + (topology(l - 1) + 1) * (topology(l))
+      curPos = curPos + (topology(l - 1) + 1) * topology(l)
       l += 1
     }
     tmp
   }
 
-  val noNodes: Int = {
+  protected val noNodes: Int = {
     var tmp: Integer = 0
     var l = 0
-    while(l < topology.size) {
+    while (l < topology.size) {
       tmp = tmp + topology(l)
       l += 1
     }
     tmp
   }
 
-  val ofsNode: Array[Int] = {
+  protected val ofsNode: Array[Int] = {
     val tmp = new Array[Int](L + 1)
     tmp(0) = 0
     var l = 1
-    while(l <= L) {
+    while (l <= L) {
       tmp(l) = tmp(l - 1) + topology(l - 1)
       l += 1
     }
     tmp
   }
 
-  override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
-    val arrData = data.toArray
-    val arrWeights = weights.toArray
+  protected def forwardRun(arrData: Array[Double], arrWeights: Array[Double]): Array[Double] = {
     val arrNodes = new Array[Double](noNodes)
-
     var i: Int = 0
     var j: Int = 0
     var l: Int = 0
-
-    // forward run
-    i = 0;
-    while(i < topology(0)) {
+    i = 0
+    while (i < topology(0)) {
       arrNodes(i) = arrData(i)
       i += 1
     }
     l = 1
-    while( l <= L ) {
+    while (l <= L) {
       j = 0
-      while(j < topology(l)) {
-        var cum: Double = 0.0;
+      while (j < topology(l)) {
+        var cum: Double = 0.0
         i = 0
-        while(i < topology(l - 1)) {
+        while (i < topology(l - 1)) {
           cum = cum +
             arrWeights(ofsWeight(l) + (topology(l - 1) + 1) * j + i) *
               arrNodes(ofsNode(l - 1) + i)
@@ -331,16 +276,31 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
       }
       l += 1
     }
+    arrNodes
+  }
+}
+
+private class ANNLeastSquaresGradient(val topology: Array[Int]) extends Gradient with ANNHelper {
+
+  override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
+    val arrData = data.toArray
+    val arrWeights = weights.toArray
+
+    var i: Int = 0
+    var j: Int = 0
+    var l: Int = 0
+    // forward run
+    val arrNodes = forwardRun(arrData, arrWeights)
     val arrDiff = new Array[Double](topology(L))
     j = 0
-    while( j < topology(L)) {
-      arrDiff(j) = (arrNodes(ofsNode(L) + j) - arrData(topology(0) + j))
+    while (j < topology(L)) {
+      arrDiff(j) = arrNodes(ofsNode(L) + j) - arrData(topology(0) + j)
       j += 1
     }
 
-    var err: Double = 0;
+    var err: Double = 0
     j = 0
-    while(j < topology(L)) {
+    while (j < topology(L)) {
       err = err + arrDiff(j) * arrDiff(j)
       j += 1
     }
@@ -348,19 +308,19 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
     // back propagation
     val arrDelta = new Array[Double](noNodes)
     j = 0
-    while(j < topology(L)) {
+    while (j < topology(L)) {
       arrDelta(ofsNode(L) + j) =
         arrDiff(j) *
           arrNodes(ofsNode(L) + j) * (1 - arrNodes(ofsNode(L) + j))
       j += 1
     }
     l = L - 1
-    while(l > 0) {
+    while (l > 0) {
       j = 0
-      while(j < topology(l)) {
+      while (j < topology(l)) {
         var cum: Double = 0.0
         i = 0
-        while( i < topology(l + 1)) {
+        while (i < topology(l + 1)) {
           cum = cum +
             arrWeights(ofsWeight(l + 1) + (topology(l) + 1) * i + j) *
               arrDelta(ofsNode(l + 1) + i) *
@@ -375,11 +335,11 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
     // gradient
     val arrGrad = new Array[Double](noWeights)
     l = 1
-    while(l <= L) {
+    while (l <= L) {
       j = 0
-      while(j < topology(l)) {
+      while (j < topology(l)) {
         i = 0
-        while(i < topology(l - 1)) {
+        while (i < topology(l - 1)) {
           arrGrad(ofsWeight(l) + (topology(l - 1) + 1) * j + i) =
             arrNodes(ofsNode(l - 1) + i) *
               arrDelta(ofsNode(l) + j)
@@ -401,7 +361,7 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
       cumGradient: Vector): Double = {
     val (grad, err) = compute(data, label, weights)
     cumGradient.toBreeze += grad.toBreeze
-    return err
+    err
   }
 }
 

From 8e3e2d5f347805198207a5b7e615e13529b85891 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Tue, 23 Sep 2014 10:22:58 +0800
Subject: [PATCH 046/143] Update ArtificialNeuralNetwork.scala

Cosmetic white space change
---
 .../org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index d99e78363ae2e..151526de60c4d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -103,7 +103,7 @@ class ArtificialNeuralNetwork private(
     setNumIterations(maxNumIterations)
 
   private def run(input: RDD[(Vector, Vector)], initialWeights: Vector):
-  ArtificialNeuralNetworkModel = {
+      ArtificialNeuralNetworkModel = {
     val data = input.map(v =>
       (0.0,
         Vectors.fromBreeze(DenseVector.vertcat(

From d2b80fee7c3c2c067b60277327c3ab16c9ef4461 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Tue, 23 Sep 2014 10:26:36 +0800
Subject: [PATCH 047/143] Update ArtificialNeuralNetwork.scala

Another cosmetic white space change
---
 .../org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index 151526de60c4d..8531851620a61 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -163,7 +163,7 @@ object ArtificialNeuralNetwork {
       topology: Array[Int],
       maxNumIterations: Int,
       convergenceTol: Double): ArtificialNeuralNetworkModel = {
-        train(input, topology, randomWeights(topology), maxNumIterations, convergenceTol)
+    train(input, topology, randomWeights(topology), maxNumIterations, convergenceTol)
   }
 
   def randomWeights(topology: Array[Int]): Vector = {

From 1a1c10b679fe50bfe6e9ed076d5c7352926266c3 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Tue, 23 Sep 2014 10:37:27 +0800
Subject: [PATCH 048/143] Update ArtificialNeuralNetwork.scala

Update comment
---
 .../org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index 8531851620a61..fe4bc40f26008 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -79,7 +79,7 @@ class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topol
   /**
    * Predict values for a single data point using the model trained.
    *
-   * @param testData array representing a single data point
+   * @param testData Vector representing a single data point
    * @return Vector prediction from the trained model
    *
    *         Returns the complete vector.

From 2a9554b1bc8802a7c234bf22743c071c08386519 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Thu, 25 Sep 2014 16:29:40 +0800
Subject: [PATCH 049/143] Update mllib-ann.md

---
 docs/mllib-ann.md | 126 +++++++++++++++++++++++++++++++---------------
 1 file changed, 85 insertions(+), 41 deletions(-)

diff --git a/docs/mllib-ann.md b/docs/mllib-ann.md
index ac42446276255..bd91b1439da09 100644
--- a/docs/mllib-ann.md
+++ b/docs/mllib-ann.md
@@ -15,6 +15,35 @@ The implementation currently consist of the following files:
 * 'ANNDemo': a demo that approximates three functions and shows a graphical representation of
 the result
 
+# Summary of usage
+
+The "ArtificialNeuralNetwork" object is used as an interface to the neural network. It is
+called as follows:
+
+```
+val annModel = ArtificialNeuralNetwork.train(rdd, hiddenLayersTopology, maxNumIterations)
+```
+
+where
+
+* `rdd` is an RDD of type (Vector,Vector), the first element containing the input vector and
+the second the associated output vector.
+* `hiddenLayersTopology` is an array of integers (Array[Int]), which contains the number of
+nodes per hidden layer, starting with the layer that takes inputs from the input layer, and
+finishing with the layer that outputs to the output layer. The bias nodes are not counted.
+* `maxNumIterations` is an upper bound to the number of iterations to be performed.
+* `ANNmodel` contains the trained ANN parameters, and can be used to calculated the ANNs
+approximation to arbitrary input values.
+
+The approximations can be calculated as follows:
+
+val v_out = annModel.predict(v_in)
+
+where v_in is either a Vector or an RDD of Vectors, and v_out respectively a Vector or RDD of
+(Vector,Vector) pairs, corresponding to input and output values.
+
+Further details and other calling options will be elaborated upon below.
+
 # Architecture and Notation
 
 The file ArtificialNeuralNetwork.scala implements the ANN. The following picture shows the
@@ -72,70 +101,84 @@ The ANN also implements bias units. These are nodes that always output the value
 units are in all layers except the output layer. They act similar to other nodes, but do not
 have input.
 
+The "hiddenLayersTopology" array is converted into the "topology" array by adding the number of
+input nodes in front, and the number of output nodes at the end.
+
 The value of node N_{j,l} is calculated  as follows:
 
 `$N_{j,l} = g( \sum_{i=0}^{topology_l} W_{i,j,l)*N_{i,l-1} )$`
 
-Where g is the sigmod function
+Where g is the sigmoid function
 
-`$g(t) = \frac{1}{1+e^{-t}}$`
+`$g(t) = \frac{e^{\beta t} }{1+e^{\beta t}}$`
 
 # LBFGS
 
-MLlib uses the LBFGS algorithm for training. It minimises the following error function:
+MLlib's ANN implementation uses the LBFGS optimisation algorithm for training. It minimises the
+following error function:
 
-`$E = \sum_{k=0}^{K-1} (N_{k,L} - Y_k )^2$`
+`$E = \sum_{k=0}^{K-1} (N_{k,L} - Y_k)^2$`
 
 where Y_k is the target output given inputs N_{0,0} ... N_{I-1,0}.
 
 # Implementation Details
 
-## The `ArtificialNeuralNetwork` class
+## The "ArtificialNeuralNetwork" class
 
-The `ArtificialNeuralNetwork` class has the following constructor:
+The "ArtificialNeuralNetwork" class has the following constructor:
 
-`class ArtificialNeuralNetwork private(topology: Array[Int], maxNumIterations: Int,
-convergenceTol: Double)`
+```
+class ArtificialNeuralNetwork private(topology: Array[Int], maxNumIterations: Int,
+convergenceTol: Double)
+```
 
 * `topology` is an array of integers indicating then number of nodes per layer. For example, if
-`topology` holds `(3, 5, 1)`, it means that there are three input nodes, five nodes in a single
+"topology" holds (3, 5, 1), it means that there are three input nodes, five nodes in a single
 hidden layer and 1 output node.
 * `maxNumIterations` indicates the number of iterations after which the LBFGS algorithm must
 have stopped.
 * `convergenceTol` indicates the acceptable error, and if reached the LBFGS algorithm will
-stop. A lower number of `convergenceTol` will give a higher precision.
-
-There is also an object `ArtificialNeuralNetwork`. This object contains the training function.
-There are six different instances of the training function, each for use with different
-parameters. All take as the first parameter the RDD `input`, which contains pairs of input and
-output vectors.
-
-* `def train(input: RDD[(Vector, Vector)], topology: Array[Int], maxNumIterations: Int):
-ArtificialNeuralNetworkModel`: starts training with random initial weights, and a default
-`convergenceTol`=1e-5.
-* `def train(input: RDD[(Vector, Vector)], topology: Array[Int], initialWeights: Vector,
-maxNumIterations: Int): ArtificialNeuralNetworkModel`: starts training with given initial
-weights, and a default `convergenceTol`=1e-5.
+stop. A lower value of "convergenceTol" will give a higher precision.
+
+## The "ArtificialNeuralNetwork" object
+
+The object "ArtificialNeuralNetwork" is the interface to the "ArtificialNeuralNetwork" class.
+The object contains the training function. There are four different instances of the training
+function, each for use with different parameters. All take as the first parameter the RDD
+"input", which contains pairs of input and output vectors.
+
+* `def train(input: RDD[(Vector, Vector)], hiddenLayersTopology: Array[Int], maxNumIterations:
+Int): ArtificialNeuralNetworkModel`: starts training with random initial weights, and a default
+convergenceTol=1e-4.
 * `def train(input: RDD[(Vector, Vector)], model: ArtificialNeuralNetworkModel,
 maxNumIterations: Int): ArtificialNeuralNetworkModel`: resumes training given an earlier
-calculated model, and a default `convergenceTol`=1e-5.
-* `def train(input: RDD[(Vector, Vector)], topology: Array[Int], maxNumIterations: Int,
-convergenceTol: Double): ArtificialNeuralNetworkModel`: starts training with random initial
-weights. Allows setting a customised `convergenceTol`.
-* `def train(input: RDD[(Vector, Vector)], topology: Array[Int], initialWeights: Vector,
-maxNumIterations: Int, convergenceTol: Double): ArtificialNeuralNetworkModel`: starts training
-with given initial weights. Allows setting a customised `convergenceTol`.
+calculated model, and a default convergenceTol=1e-4.
+* `def train(input: RDD[(Vector, Vector)], hiddenLayersTopology: Array[Int], maxNumIterations:
+Int, convergenceTol: Double): ArtificialNeuralNetworkModel`: starts training with random
+initial weights. Allows setting a customised "convergenceTol".
 * `def train(input: RDD[(Vector, Vector)], model: ArtificialNeuralNetworkModel,
 maxNumIterations: Int, convergenceTol: Double): ArtificialNeuralNetworkModel`: resumes training
-given an earlier calculated model. Allows setting a customised `convergenceTol`.
+given an earlier calculated model. Allows setting a customised "convergenceTol".
+
+Notice that the "hiddenLayersTopology" differs from the "topology" array. The
+"hiddenLayersTopology" does not include the number of nodes in the input and output layers. The
+number of nodes in input and output layers is calculated from the first element of the training
+RDD. For example, the "topology" array (3, 5, 7, 1) would have a "hiddenLayersTopology" (5, 7),
+the values 3 and 1 are deduced from the training data. The rationale for having these different
+arrays is that future methods may have a different mapping between input values and input nodes
+or output values and output nodes.
+
+## The "ArtificialNeuralNetworkModel" class
 
-All training functions return the trained ANN using the class `ArtificialNeuralNetworkModel`.
+All training functions return the trained ANN using the class "ArtificialNeuralNetworkModel".
 This class has the following function:
 
-* `predictV(testData: Vector): Vector` calculates the output vector given input vector
-`testData`.
+* `predict(testData: Vector): Vector` calculates the output vector given input vector
+"testData".
+* `predict(testData: RDD[Vector]): RDD[(Vector,Vector)]` returns (input, output) vector pairs,
+using input vector pairs in "testData".
 
-The weights use dby `predictV` come from the model.
+The weights used by "predict" come from the model.
 
 ## Training
 
@@ -150,17 +193,17 @@ optimiser, updater and possibly gradient as required.
 
 # Demo and tests
 
-Usage of MLlib's ANN is demonstrated through the 'ANNDemo' demo program. The program generates
+Usage of MLlib's ANN is demonstrated through the "ANNDemo" demo program. The program generates
 three functions:
 
 * f2d: x -> y
 * f3d: (x,y) -> z
 * f4d: t -> (x,y,z)
 
-It will calculate an approximation of the target function, and show a graphical representation
+It will calculate approximations of the target functions, and show a graphical representation
 of the training set and the results after applying the testing set.
 
-In addition, there are the following tests:
+In addition, there are the following automated tests:
 
 * "ANN learns XOR function": tests that the ANN can properly approximate an XOR function.
 * "Gradient of ANN": tests that the output of the ANN gradient is roughly equal to an
@@ -168,10 +211,11 @@ approximated gradient.
 
 # Conclusion
 
-The 'AritificalNeuralNetwork' class implements a Artificial Neural Network (ANN), using the
-LBFGS algorithm. It takes as input an RDD of input/output values of type 'Vector', and returns
-an object of type 'ArtificialNeuralNetworkModel' containing the parameters of the trained ANN.
-The 'ArtificialNeuralNetworkModel' object can also be used to calculate results after training.
+The "ArtificialNeuralNetwork" class implements a Artificial Neural Network (ANN), using the
+LBFGS algorithm. It takes as input an RDD of input/output values of type "(Vector,Vector)", and
+returns an object of type "ArtificialNeuralNetworkModel" containing the parameters of the
+trained ANN. The "ArtificialNeuralNetworkModel" object can also be used to calculate results
+after training.
 
 The training of an ANN can be interrupted and later continued, allowing intermediate inspection
 of the results.

From 40197efc15af5f602157640a2cd7a273beebcff1 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Thu, 25 Sep 2014 16:31:17 +0800
Subject: [PATCH 050/143] Update ANNDemo.scala

Updated to fit "hiddenLayersTopology"
---
 .../org/apache/spark/examples/ANNDemo.scala   | 20 ++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/examples/src/main/scala/org/apache/spark/examples/ANNDemo.scala b/examples/src/main/scala/org/apache/spark/examples/ANNDemo.scala
index 14e9a2d61cb10..dd981f90e9cff 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ANNDemo.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ANNDemo.scala
@@ -21,11 +21,13 @@ import java.awt._
 import java.awt.event._
 import java.text.SimpleDateFormat
 import java.util.Calendar
+
 import org.apache.spark._
 import org.apache.spark.mllib.ann._
 import org.apache.spark.mllib.linalg._
 import org.apache.spark.mllib.regression._
 import org.apache.spark.rdd.RDD
+
 import scala.Array.canBuildFrom
 import scala.util.Random
 
@@ -367,7 +369,7 @@ object ANNDemo {
   }
 
 
-  def generateInput3D(f: (Double,Double) => Double, xmin: Double, xmax: Double, 
+  def generateInput3D(f: (Double,Double) => Double, xmin: Double, xmax: Double,
       ymin: Double, ymax: Double, noPoints: Int): Array[(Vector,Vector)] = {
 
     var out = new Array[(Vector,Vector)](noPoints)
@@ -484,7 +486,7 @@ object ANNDemo {
 
     val validationRDD2D =
       sc.parallelize(generateInput2D( T => f(T), -10, 10, 100 ), 2).cache
-    val validationRDD3D = 
+    val validationRDD3D =
       sc.parallelize(generateInput3D( (x,y) => f3D(x,y), -10, 10, -10, 10, 100 ), 2).cache
     val validationRDD4D =
       sc.parallelize( generateInput4D( t => f4D(t), -10, 10, 100 ), 2 ).cache
@@ -495,25 +497,25 @@ object ANNDemo {
 
     var starttime = Calendar.getInstance().getTime()
     println("Training 2D")
-    var model2D = ArtificialNeuralNetwork.train(testRDD2D, Array[Int](1, 5, 3, 1), 1000, 1e-8)
+    var model2D = ArtificialNeuralNetwork.train(testRDD2D, Array[Int](5, 3), 1000, 1e-8)
     var stoptime = Calendar.getInstance().getTime()
     println(((stoptime.getTime-starttime.getTime + 500) / 1000) + "s")
 
     starttime = stoptime
     println("Training 3D")
-    var model3D = ArtificialNeuralNetwork.train(testRDD3D, Array[Int](2, 20, 1), 1000, 1e-8)
+    var model3D = ArtificialNeuralNetwork.train(testRDD3D, Array[Int](20), 1000, 1e-8)
     stoptime = Calendar.getInstance().getTime()
     println(((stoptime.getTime-starttime.getTime + 500) / 1000) + "s")
 
     starttime = stoptime
     println("Training 4D")
-    var model4D = ArtificialNeuralNetwork.train(testRDD4D, Array[Int](1, 20, 3), 1000, 1e-8)
+    var model4D = ArtificialNeuralNetwork.train(testRDD4D, Array[Int](20), 1000, 1e-8)
     stoptime = Calendar.getInstance().getTime()
     println(((stoptime.getTime-starttime.getTime + 500) / 1000) + "s")
 
-    val predictedAndTarget2D = validationRDD2D.map(T => (T._1, T._2, model2D.predictV(T._1)))
-    val predictedAndTarget3D = validationRDD3D.map(T => (T._1, T._2, model3D.predictV(T._1)))
-    val predictedAndTarget4D = validationRDD4D.map(T => (T._1, T._2, model4D.predictV(T._1)))
+    val predictedAndTarget2D = validationRDD2D.map(T => (T._1, T._2, model2D.predict(T._1)))
+    val predictedAndTarget3D = validationRDD3D.map(T => (T._1, T._2, model3D.predict(T._1)))
+    val predictedAndTarget4D = validationRDD4D.map(T => (T._1, T._2, model4D.predict(T._1)))
 
     var err2D = predictedAndTarget2D.map( T =>
       (T._3.toArray(0) - T._2.toArray(0))*(T._3.toArray(0) - T._2.toArray(0))
@@ -554,7 +556,7 @@ object ANNDemo {
 
     while(true) { // stops when closing the window
 
-      curAngle = curAngle + math.Pi/4
+      curAngle = curAngle + math.Pi/8
       if(curAngle >= 2*math.Pi) {
         curAngle = curAngle - 2*math.Pi
       }

From 589205faf88c2d693c81a5dee891aa41b3538af9 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Thu, 25 Sep 2014 16:32:33 +0800
Subject: [PATCH 051/143] Update ArtificialNeuralNetwork.scala

Updated with hiddenLayersTopology and added scaladoc API info.
---
 .../mllib/ann/ArtificialNeuralNetwork.scala   | 178 +++++++++++++-----
 1 file changed, 135 insertions(+), 43 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index fe4bc40f26008..95e7e87e984d3 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.mllib.ann
 
 import breeze.linalg.{DenseVector, Vector => BV, axpy => brzAxpy}
+
 import org.apache.spark.mllib.linalg.{Vector, Vectors}
 import org.apache.spark.mllib.optimization._
 import org.apache.spark.rdd.RDD
@@ -33,15 +34,20 @@ import org.apache.spark.util.random.XORShiftRandom
  *
  * NOTE: output values should be in the range [0,1]
  *
- * For a network of L layers:
+ * For a network of H hidden layers:
+ *
+ * hiddenLayersTopology(h) indicates the number of nodes in hidden layer h, excluding the bias 
+ * node. h counts from 0 (first hidden layer, taking inputs from input layer) to H - 1 (last
+ * hidden layer, sending outputs to the output layer).
  *
- * topology( l ) indicates the number of nodes in layer l, excluding the bias node.
+ * hiddenLayersTopology is converted internally to topology, which adds the number of nodes
+ * in the input and output layers.
  *
  * noInput = topology(0), the number of input nodes
  * noOutput = topology(L-1), the number of output nodes
  *
  * input = data( 0 to noInput-1 )
- * output = data( noInput to noInput+noOutput-1 )
+ * output = data( noInput to noInput + noOutput - 1 )
  *
  * W_ijl is the weight from node i in layer l-1 to node j in layer l
  * W_ijl goes to position ofsWeight(l) + j*(topology(l-1)+1) + i in the weights vector
@@ -68,29 +74,57 @@ import org.apache.spark.util.random.XORShiftRandom
  *
  */
 
+/**
+ * Contains the parameters of an Artificial Neural Network (ANN)
+ * 
+ * @param weights The weights between the neurons in the ANN.
+ * 
+ * @param topology Array containing the number of nodes per layer in the network, including
+ * the nodes in the input and output layer, but excluding the bias nodes.
+ */
 class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topology: Array[Int])
   extends Serializable with ANNHelper {
 
-  def computeValues(arrData: Array[Double], arrWeights: Array[Double]): Array[Double] = {
-    val arrNodes = forwardRun(arrData, arrWeights)
-    arrNodes.slice(arrNodes.size - topology(L), arrNodes.size)
+  /**
+   * Predicts values for a single data point using the trained model.
+   *
+   * @param testData Represents a single data point.
+   *
+   * @return Prediction using the trained model.
+   */
+  def predict(testData: Vector): Vector = {
+    Vectors.dense(computeValues(testData.toArray, weights.toArray))
   }
 
   /**
-   * Predict values for a single data point using the model trained.
+   * Predict values for an RDD of data points using the trained model.
    *
-   * @param testData Vector representing a single data point
-   * @return Vector prediction from the trained model
+   * @param testDataRDD RDD representing the input vectors.
    *
-   *         Returns the complete vector.
+   * @return RDD with predictions using the trained model as (input, output) pairs.
    */
-  def predictV(testData: Vector): Vector = {
-    Vectors.dense(computeValues(testData.toArray, weights.toArray))
+  def predict(testDataRDD: RDD[Vector]): RDD[(Vector,Vector)] = {
+    testDataRDD.map(T => (T, predict(T)) )
+  }
+
+  private def computeValues(arrData: Array[Double], arrWeights: Array[Double]): Array[Double] = {
+    val arrNodes = forwardRun(arrData, arrWeights)
+    arrNodes.slice(arrNodes.size - topology(L), arrNodes.size)
   }
 
 }
 
-class ArtificialNeuralNetwork private(
+/**
+ * Performs the training of an Artificial Neural Network (ANN)
+ *
+ * @param topology A vector containing the number of nodes per layer in the network, including
+ * the nodes in the input and output layer, but excluding the bias nodes.
+ * 
+ * @param maxNumItereations The maximum number of iterations for the training phase.
+ * 
+ * @param convergenceTol Convergence tolerance for LBFGS. Smaller value for closer convergence.
+ */
+class ArtificialNeuralNetwork private[mllib](
     topology: Array[Int],
     maxNumIterations: Int,
     convergenceTol: Double)
@@ -98,13 +132,24 @@ class ArtificialNeuralNetwork private(
 
   private val gradient = new ANNLeastSquaresGradient(topology)
   private val updater = new ANNUpdater()
-  private var optimizer: Optimizer = new LBFGS(gradient, updater).
+  private val optimizer = new LBFGS(gradient, updater).
     setConvergenceTol(convergenceTol).
     setNumIterations(maxNumIterations)
 
-  private def run(input: RDD[(Vector, Vector)], initialWeights: Vector):
+ /**
+   * Trains the ANN.
+   *
+   * @param trainingRDD RDD containing (input, output) pairs for training.
+   *
+   * @param initialWeights: the initial weights of the ANN   
+   *
+   * @return Trained ANN as ArtificialNeuralNetworkModel.
+   *
+   * Uses default convergence tolerance 1e-4 for LBFGS.
+   */
+  private def run(trainingRDD: RDD[(Vector, Vector)], initialWeights: Vector):
       ArtificialNeuralNetworkModel = {
-    val data = input.map(v =>
+    val data = trainingRDD.map(v =>
       (0.0,
         Vectors.fromBreeze(DenseVector.vertcat(
           v._1.toBreeze.toDenseVector,
@@ -115,58 +160,105 @@ class ArtificialNeuralNetwork private(
   }
 }
 
+/**
+ * Interface to the Artificial Neural Network (ANN)
+ */
 object ArtificialNeuralNetwork {
 
+  private val defaultTolerance: Double = 1e-4
+
+  /**
+   * Trains an ANN.
+   *
+   * @param trainingRDD RDD containing (input, output) pairs for training.
+   *
+   * @param hiddenLayersTopology Number of nodes per hidden layer, excluding the bias nodes.
+   *
+   * @param maxNumIterations Specifies maximum number of training iterations.
+   *
+   * @return Trained ANN as ArtificialNeuralNetworkModel.
+   *
+   * Uses default convergence tolerance 1e-4 for LBFGS.
+   */
   def train(
-      input: RDD[(Vector, Vector)],
-      topology: Array[Int],
-      initialWeights: Vector,
+      trainingRDD: RDD[(Vector, Vector)],
+      hiddenLayersTopology: Array[Int],
       maxNumIterations: Int): ArtificialNeuralNetworkModel = {
-    new ArtificialNeuralNetwork(topology, maxNumIterations, 1e-4)
-      .run(input, initialWeights)
+    train( trainingRDD, hiddenLayersTopology, maxNumIterations, defaultTolerance)
   }
 
+  /**
+   * Continues training of an ANN.
+   *
+   * @param trainingRDD RDD containing (input, output) pairs for training.
+   *
+   * @param model Model of an already partly trained ANN.
+   *
+   * @param maxNumIterations Int Maximum number of training iterations.
+   *
+   * @return Trained ANN as ArtificialNeuralNetworkModel.
+   *
+   * Uses default convergence tolerance 1e-4 for LBFGS.
+   */
   def train(
       input: RDD[(Vector,Vector)],
       model: ArtificialNeuralNetworkModel,
       maxNumIterations: Int): ArtificialNeuralNetworkModel = {
-    train(input, model.topology, model.weights, maxNumIterations)
-  }
-
-  def train(
-      input: RDD[(Vector, Vector)],
-      topology: Array[Int],
-      maxNumIterations: Int): ArtificialNeuralNetworkModel = {
-    train(input, topology, randomWeights(topology), maxNumIterations)
-  }
-
-  def train(
-      input: RDD[(Vector, Vector)],
-      topology: Array[Int],
-      initialWeights: Vector,
-      maxNumIterations: Int,
-      convergenceTol: Double): ArtificialNeuralNetworkModel = {
-    new ArtificialNeuralNetwork(topology, maxNumIterations, convergenceTol)
-      .run(input, initialWeights)
+    train(input, model, maxNumIterations, defaultTolerance)
   }
 
+  /**
+   * Trains an ANN using customized convergence tolerance.
+   *
+   * @param trainingRDD RDD containing (input, output) pairs for training.
+   *
+   * @param hiddenLayersTopology Number of nodes per hidden layer, excluding the bias nodes.
+   *
+   * @param maxNumIterations Maximum number of training iterations.
+   *
+   * @param convergenceTol Convergence tolerance for LBFGS. Smaller value for closer convergence.
+   *
+   * @return Trained ANN as ArtificialNeuralNetworkModel.
+   */
   def train(
       input: RDD[(Vector,Vector)],
       model: ArtificialNeuralNetworkModel,
       maxNumIterations: Int,
       convergenceTol: Double): ArtificialNeuralNetworkModel = {
-    train(input, model.topology, model.weights, maxNumIterations, convergenceTol)
+    new ArtificialNeuralNetwork( model.topology, maxNumIterations, convergenceTol ).
+      run(input, model.weights)
   }
 
+  /**
+   * Continues training of an ANN using customized convergence tolerance.
+   *
+   * @param trainingRDD RDD containing (input, output) pairs for training.
+   *
+   * @param model Model of an already partly trained ANN.
+   *
+   * @param maxNumIterations Maximum number of training iterations.
+   *
+   * @param convergenceTol Convergence tolerance for LBFGS. Smaller value for closer convergence.
+   *
+   * @return Trained ANN as ArtificialNeuralNetworkModel.
+   */
   def train(
       input: RDD[(Vector, Vector)],
-      topology: Array[Int],
+      hiddenLayersTopology: Array[Int],
       maxNumIterations: Int,
       convergenceTol: Double): ArtificialNeuralNetworkModel = {
-    train(input, topology, randomWeights(topology), maxNumIterations, convergenceTol)
+    val topology = convertTopology(input, hiddenLayersTopology)
+    new ArtificialNeuralNetwork(topology, maxNumIterations, convergenceTol).
+      run(input, randomWeights(topology))
+  }
+
+  private def convertTopology( input: RDD[(Vector,Vector)],
+      hiddenLayersTopology: Array[Int] ): Array[Int] = {
+    val firstElt = input.first
+    firstElt._1.size +: hiddenLayersTopology :+ firstElt._2.size
   }
 
-  def randomWeights(topology: Array[Int]): Vector = {
+  private def randomWeights(topology: Array[Int]): Vector = {
     val rand = new XORShiftRandom()
 
     var i: Int = 0

From 6390947e81a6817b4fce436f2d1a13f843f5eebc Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Thu, 25 Sep 2014 16:34:10 +0800
Subject: [PATCH 052/143] Update ANNSuite.scala

Updated to fit "hiddenLayersTopology"
---
 .../org/apache/spark/mllib/ann/ANNSuite.scala | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
index 76d0e3e9da7be..86894b5df4e6b 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.mllib.ann
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.util.LocalSparkContext
 import org.apache.spark.util.random.XORShiftRandom
+
 import org.scalatest.FunSuite
 
 class ANNSuite extends FunSuite with LocalSparkContext {
@@ -32,16 +33,14 @@ class ANNSuite extends FunSuite with LocalSparkContext {
       Array[Double](1,1)
     )
     val outputs = Array[Double](0, 1, 1, 0)
-    val inputSize = 2
     val hiddenSize = 5
-    val outputSize = 1
     val data = inputs.zip(outputs).map { case(features, label) =>
       (Vectors.dense(features), Vectors.dense(Array(label)))}
     val rddData = sc.parallelize(data, 2)
-    val topology = Array[Int](inputSize, hiddenSize, outputSize)
-    val model = ArtificialNeuralNetwork.train(rddData, topology, 100, 1e-5)
+    val hiddenLayersTopology = Array[Int](hiddenSize)
+    val model = ArtificialNeuralNetwork.train(rddData, hiddenLayersTopology, 100, 1e-5)
     val predictionAndLabels = rddData.map { case(input, label) =>
-      (model.predictV(input)(0), label(0)) }.collect()
+      (model.predict(input)(0), label(0)) }.collect()
     assert(predictionAndLabels.forall { case(p, l) => (math.round(p) - l) == 0 })
   }
 
@@ -115,18 +114,20 @@ class ANNSuite extends FunSuite with LocalSparkContext {
         arrTmpWeights(w) = arrTmpWeights(w) + eps
 
         val annModel1 = new ArtificialNeuralNetworkModel(weights, topology)
-        val brzO1 = annModel1.predictV(data).toBreeze
+        val brzO1 = annModel1.predict(data).toBreeze
 
         val annModel2 = new ArtificialNeuralNetworkModel(tmpWeights, topology)
-        val brzO2 = annModel2.predictV(data).toBreeze
+        val brzO2 = annModel2.predict(data).toBreeze
 
         val E1 = .5* (brzO1 - brzOut).dot(brzO1 - brzOut)
         val E2 = .5* (brzO2 - brzOut).dot(brzO2 - brzOut)
         val dEdW = ( E2 - E1 ) / eps
 
         val gradw = gradient(w)
-        val err = math.abs(dEdW - gradw)
-        assert(err < accept, s"Difference between calculated and approximated gradient too large (approximated $dEdW, calculated $gradw, difference $err)" )
+        val err = dEdW - gradw
+        assert(math.abs(err) < accept, 
+      s"Difference between calculated and approximated gradient too large ($dEdW - $gradw = $err)"
+        )
 
         arrTmpWeights(w) = arrTmpWeights(w) - eps
 

From abfb0f5d4050cd85dcdfe29e3c0e6bd833104487 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Thu, 25 Sep 2014 16:37:23 +0800
Subject: [PATCH 053/143] Update ArtificialNeuralNetwork.scala

---
 .../org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index 95e7e87e984d3..373e4a3e38ad0 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -141,7 +141,7 @@ class ArtificialNeuralNetwork private[mllib](
    *
    * @param trainingRDD RDD containing (input, output) pairs for training.
    *
-   * @param initialWeights: the initial weights of the ANN   
+   * @param initialWeights The initial weights of the ANN   
    *
    * @return Trained ANN as ArtificialNeuralNetworkModel.
    *

From 039df76b9dc860a6a04e017b123b80b19dbef553 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Thu, 25 Sep 2014 16:42:59 +0800
Subject: [PATCH 054/143] Update ArtificialNeuralNetwork.scala

---
 .../spark/mllib/ann/ArtificialNeuralNetwork.scala    | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index 373e4a3e38ad0..77ff9b3bc9b3e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -201,10 +201,10 @@ object ArtificialNeuralNetwork {
    * Uses default convergence tolerance 1e-4 for LBFGS.
    */
   def train(
-      input: RDD[(Vector,Vector)],
+      trainingRDD: RDD[(Vector,Vector)],
       model: ArtificialNeuralNetworkModel,
       maxNumIterations: Int): ArtificialNeuralNetworkModel = {
-    train(input, model, maxNumIterations, defaultTolerance)
+    train(trainingRDD, model, maxNumIterations, defaultTolerance)
   }
 
   /**
@@ -221,12 +221,12 @@ object ArtificialNeuralNetwork {
    * @return Trained ANN as ArtificialNeuralNetworkModel.
    */
   def train(
-      input: RDD[(Vector,Vector)],
+      trainingRDD: RDD[(Vector,Vector)],
       model: ArtificialNeuralNetworkModel,
       maxNumIterations: Int,
       convergenceTol: Double): ArtificialNeuralNetworkModel = {
     new ArtificialNeuralNetwork( model.topology, maxNumIterations, convergenceTol ).
-      run(input, model.weights)
+      run(trainingRDD, model.weights)
   }
 
   /**
@@ -243,13 +243,13 @@ object ArtificialNeuralNetwork {
    * @return Trained ANN as ArtificialNeuralNetworkModel.
    */
   def train(
-      input: RDD[(Vector, Vector)],
+      trainingRDD: RDD[(Vector, Vector)],
       hiddenLayersTopology: Array[Int],
       maxNumIterations: Int,
       convergenceTol: Double): ArtificialNeuralNetworkModel = {
     val topology = convertTopology(input, hiddenLayersTopology)
     new ArtificialNeuralNetwork(topology, maxNumIterations, convergenceTol).
-      run(input, randomWeights(topology))
+      run(trainingRDD, randomWeights(topology))
   }
 
   private def convertTopology( input: RDD[(Vector,Vector)],

From aff66aeb51fae0026518c164cf18987bad1f7bb5 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Thu, 25 Sep 2014 16:50:10 +0800
Subject: [PATCH 055/143] Update ArtificialNeuralNetwork.scala

---
 .../org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index 77ff9b3bc9b3e..ca1df249175fe 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -247,7 +247,7 @@ object ArtificialNeuralNetwork {
       hiddenLayersTopology: Array[Int],
       maxNumIterations: Int,
       convergenceTol: Double): ArtificialNeuralNetworkModel = {
-    val topology = convertTopology(input, hiddenLayersTopology)
+    val topology = convertTopology(trainingRDD, hiddenLayersTopology)
     new ArtificialNeuralNetwork(topology, maxNumIterations, convergenceTol).
       run(trainingRDD, randomWeights(topology))
   }

From e78dcd6c8665d846f319c6f448870d3f91cec74a Mon Sep 17 00:00:00 2001
From: Alexander Ulanov <nashb@yandex.ru>
Date: Fri, 26 Sep 2014 15:55:14 +0400
Subject: [PATCH 056/143] Minor style fixes

---
 .../mllib/ann/ArtificialNeuralNetwork.scala   | 95 ++++++-------------
 .../org/apache/spark/mllib/ann/ANNSuite.scala | 25 +----
 2 files changed, 32 insertions(+), 88 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index ca1df249175fe..2d6110e2b1fc7 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -75,11 +75,10 @@ import org.apache.spark.util.random.XORShiftRandom
  */
 
 /**
- * Contains the parameters of an Artificial Neural Network (ANN)
+ * Artificial neural network (ANN) model
  * 
- * @param weights The weights between the neurons in the ANN.
- * 
- * @param topology Array containing the number of nodes per layer in the network, including
+ * @param weights the weights between the neurons in the ANN.
+ * @param topology array containing the number of nodes per layer in the network, including
  * the nodes in the input and output layer, but excluding the bias nodes.
  */
 class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topology: Array[Int])
@@ -88,9 +87,8 @@ class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topol
   /**
    * Predicts values for a single data point using the trained model.
    *
-   * @param testData Represents a single data point.
-   *
-   * @return Prediction using the trained model.
+   * @param testData represents a single data point.
+   * @return prediction using the trained model.
    */
   def predict(testData: Vector): Vector = {
     Vectors.dense(computeValues(testData.toArray, weights.toArray))
@@ -100,7 +98,6 @@ class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topol
    * Predict values for an RDD of data points using the trained model.
    *
    * @param testDataRDD RDD representing the input vectors.
-   *
    * @return RDD with predictions using the trained model as (input, output) pairs.
    */
   def predict(testDataRDD: RDD[Vector]): RDD[(Vector,Vector)] = {
@@ -111,7 +108,6 @@ class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topol
     val arrNodes = forwardRun(arrData, arrWeights)
     arrNodes.slice(arrNodes.size - topology(L), arrNodes.size)
   }
-
 }
 
 /**
@@ -119,9 +115,7 @@ class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topol
  *
  * @param topology A vector containing the number of nodes per layer in the network, including
  * the nodes in the input and output layer, but excluding the bias nodes.
- * 
- * @param maxNumItereations The maximum number of iterations for the training phase.
- * 
+ * @param maxNumIterations The maximum number of iterations for the training phase.
  * @param convergenceTol Convergence tolerance for LBFGS. Smaller value for closer convergence.
  */
 class ArtificialNeuralNetwork private[mllib](
@@ -134,18 +128,15 @@ class ArtificialNeuralNetwork private[mllib](
   private val updater = new ANNUpdater()
   private val optimizer = new LBFGS(gradient, updater).
     setConvergenceTol(convergenceTol).
-    setNumIterations(maxNumIterations)
+    setMaxNumIterations(maxNumIterations)
 
  /**
-   * Trains the ANN.
+   * Trains the ANN model.
+   * Uses default convergence tolerance 1e-4 for LBFGS.
    *
    * @param trainingRDD RDD containing (input, output) pairs for training.
-   *
-   * @param initialWeights The initial weights of the ANN   
-   *
-   * @return Trained ANN as ArtificialNeuralNetworkModel.
-   *
-   * Uses default convergence tolerance 1e-4 for LBFGS.
+   * @param initialWeights the initial weights of the ANN
+   * @return ANN model.
    */
   private def run(trainingRDD: RDD[(Vector, Vector)], initialWeights: Vector):
       ArtificialNeuralNetworkModel = {
@@ -161,7 +152,7 @@ class ArtificialNeuralNetwork private[mllib](
 }
 
 /**
- * Interface to the Artificial Neural Network (ANN)
+ * Top level methods for training the artificial neural network (ANN)
  */
 object ArtificialNeuralNetwork {
 
@@ -169,16 +160,12 @@ object ArtificialNeuralNetwork {
 
   /**
    * Trains an ANN.
+   * Uses default convergence tolerance 1e-4 for LBFGS.
    *
    * @param trainingRDD RDD containing (input, output) pairs for training.
-   *
-   * @param hiddenLayersTopology Number of nodes per hidden layer, excluding the bias nodes.
-   *
-   * @param maxNumIterations Specifies maximum number of training iterations.
-   *
-   * @return Trained ANN as ArtificialNeuralNetworkModel.
-   *
-   * Uses default convergence tolerance 1e-4 for LBFGS.
+   * @param hiddenLayersTopology number of nodes per hidden layer, excluding the bias nodes.
+   * @param maxNumIterations specifies maximum number of training iterations.
+   * @return ANN model.
    */
   def train(
       trainingRDD: RDD[(Vector, Vector)],
@@ -189,16 +176,12 @@ object ArtificialNeuralNetwork {
 
   /**
    * Continues training of an ANN.
+   * Uses default convergence tolerance 1e-4 for LBFGS.
    *
    * @param trainingRDD RDD containing (input, output) pairs for training.
-   *
-   * @param model Model of an already partly trained ANN.
-   *
-   * @param maxNumIterations Int Maximum number of training iterations.
-   *
-   * @return Trained ANN as ArtificialNeuralNetworkModel.
-   *
-   * Uses default convergence tolerance 1e-4 for LBFGS.
+   * @param model model of an already partly trained ANN.
+   * @param maxNumIterations maximum number of training iterations.
+   * @return ANN model.
    */
   def train(
       trainingRDD: RDD[(Vector,Vector)],
@@ -211,14 +194,10 @@ object ArtificialNeuralNetwork {
    * Trains an ANN using customized convergence tolerance.
    *
    * @param trainingRDD RDD containing (input, output) pairs for training.
-   *
-   * @param hiddenLayersTopology Number of nodes per hidden layer, excluding the bias nodes.
-   *
-   * @param maxNumIterations Maximum number of training iterations.
-   *
-   * @param convergenceTol Convergence tolerance for LBFGS. Smaller value for closer convergence.
-   *
-   * @return Trained ANN as ArtificialNeuralNetworkModel.
+   * @param model model of an already partly trained ANN.
+   * @param maxNumIterations maximum number of training iterations.
+   * @param convergenceTol convergence tolerance for LBFGS. Smaller value for closer convergence.
+   * @return ANN model.
    */
   def train(
       trainingRDD: RDD[(Vector,Vector)],
@@ -233,14 +212,10 @@ object ArtificialNeuralNetwork {
    * Continues training of an ANN using customized convergence tolerance.
    *
    * @param trainingRDD RDD containing (input, output) pairs for training.
-   *
-   * @param model Model of an already partly trained ANN.
-   *
-   * @param maxNumIterations Maximum number of training iterations.
-   *
-   * @param convergenceTol Convergence tolerance for LBFGS. Smaller value for closer convergence.
-   *
-   * @return Trained ANN as ArtificialNeuralNetworkModel.
+   * @param hiddenLayersTopology number of nodes per hidden layer, excluding the bias nodes.
+   * @param maxNumIterations maximum number of training iterations.
+   * @param convergenceTol convergence tolerance for LBFGS. Smaller value for closer convergence.
+   * @return ANN model.
    */
   def train(
       trainingRDD: RDD[(Vector, Vector)],
@@ -260,10 +235,8 @@ object ArtificialNeuralNetwork {
 
   private def randomWeights(topology: Array[Int]): Vector = {
     val rand = new XORShiftRandom()
-
     var i: Int = 0
     var l: Int = 0
-
     val noWeights = {
       var tmp = 0
       var i = 1
@@ -273,10 +246,8 @@ object ArtificialNeuralNetwork {
       }
       tmp
     }
-
     val initialWeightsArr = new Array[Double](noWeights)
     var pos = 0
-
     l = 1
     while (l < topology.length) {
       i = 0
@@ -291,11 +262,13 @@ object ArtificialNeuralNetwork {
   }
 }
 
+/**
+ * Helper methods for ANN
+ */
 private[ann] trait ANNHelper {
   protected val topology: Array[Int]
   protected def g(x: Double) = 1.0 / (1.0 + math.exp(-x))
   protected val L = topology.length - 1
-
   protected val noWeights = {
     var tmp = 0
     var l = 1
@@ -305,7 +278,6 @@ private[ann] trait ANNHelper {
     }
     tmp
   }
-
   protected val ofsWeight: Array[Int] = {
     val tmp = new Array[Int](L + 1)
     var curPos = 0
@@ -318,7 +290,6 @@ private[ann] trait ANNHelper {
     }
     tmp
   }
-
   protected val noNodes: Int = {
     var tmp: Integer = 0
     var l = 0
@@ -328,7 +299,6 @@ private[ann] trait ANNHelper {
     }
     tmp
   }
-
   protected val ofsNode: Array[Int] = {
     val tmp = new Array[Int](L + 1)
     tmp(0) = 0
@@ -377,7 +347,6 @@ private class ANNLeastSquaresGradient(val topology: Array[Int]) extends Gradient
   override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
     val arrData = data.toArray
     val arrWeights = weights.toArray
-
     var i: Int = 0
     var j: Int = 0
     var l: Int = 0
@@ -389,7 +358,6 @@ private class ANNLeastSquaresGradient(val topology: Array[Int]) extends Gradient
       arrDiff(j) = arrNodes(ofsNode(L) + j) - arrData(topology(0) + j)
       j += 1
     }
-
     var err: Double = 0
     j = 0
     while (j < topology(L)) {
@@ -470,5 +438,4 @@ private class ANNUpdater extends Updater {
     brzAxpy(-thisIterStepSize, gradient.toBreeze, brzWeights)
     (Vectors.fromBreeze(brzWeights), 0)
   }
-
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
index 86894b5df4e6b..35743a2fc6119 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
@@ -38,105 +38,82 @@ class ANNSuite extends FunSuite with LocalSparkContext {
       (Vectors.dense(features), Vectors.dense(Array(label)))}
     val rddData = sc.parallelize(data, 2)
     val hiddenLayersTopology = Array[Int](hiddenSize)
-    val model = ArtificialNeuralNetwork.train(rddData, hiddenLayersTopology, 100, 1e-5)
+    val model = ArtificialNeuralNetwork.train(rddData, hiddenLayersTopology, 500, 1e-5)
     val predictionAndLabels = rddData.map { case(input, label) =>
       (model.predict(input)(0), label(0)) }.collect()
     assert(predictionAndLabels.forall { case(p, l) => (math.round(p) - l) == 0 })
   }
 
   test("Gradient of ANN") {
-
     val eps = 1e-6
     val accept = 1e-7
-
     val topologyArr = Array[Array[Int]](
       Array[Int](1, 5, 1),
       Array[Int](5, 10, 5, 3),
       Array[Int](128, 256, 128)
     )
-
     val rnd = new XORShiftRandom(0)
-
     var cnt = 0
     while( cnt<topologyArr.length ) {
-
       val topology = topologyArr(cnt)
       val L = topology.length - 1
       val noInp = topology(0)
       val noOut = topology(L)
       val annGradient = new ANNLeastSquaresGradient(topology)
       var noWeights = 0
-
       var l = 1
       while(l <= L) {
         noWeights += (topology(l - 1) + 1) * topology(l)
         l += 1
       }
-
       val arrWeights = new Array[Double](noWeights)
-
       var w = 0
       while(w < noWeights) {
         arrWeights(w) = rnd.nextDouble()
         w += 1
       }
-
       val arrInp = new Array[Double](noInp)
       val arrOut = new Array[Double](noOut)
       val arrData = new Array[Double](noInp + noOut)
-
       w = 0
       while(w < noInp) {
         arrInp(w) = rnd.nextDouble()
         arrData(w) = arrInp(w)
         w += 1
       }
-
       w = 0
       while(w < noOut) {
         arrOut(w) = rnd.nextDouble()
         arrData(noInp + w) = arrOut(w)
         w += 1
       }
-
       val data = Vectors.dense( arrData )
       val brzOut = Vectors.dense( arrOut ).toBreeze
       val weights = Vectors.dense( arrWeights )
       val gradient = annGradient.compute( data, 0.0, weights )._1
-
       val arrTmpWeights = new Array[Double]( noWeights )
       Array.copy(arrWeights, 0, arrTmpWeights, 0, noWeights )
       val tmpWeights = Vectors.dense( arrTmpWeights )
-
       w = 0
       while(w < noWeights)
       {
         arrTmpWeights(w) = arrTmpWeights(w) + eps
-
         val annModel1 = new ArtificialNeuralNetworkModel(weights, topology)
         val brzO1 = annModel1.predict(data).toBreeze
-
         val annModel2 = new ArtificialNeuralNetworkModel(tmpWeights, topology)
         val brzO2 = annModel2.predict(data).toBreeze
-
         val E1 = .5* (brzO1 - brzOut).dot(brzO1 - brzOut)
         val E2 = .5* (brzO2 - brzOut).dot(brzO2 - brzOut)
         val dEdW = ( E2 - E1 ) / eps
-
         val gradw = gradient(w)
         val err = dEdW - gradw
         assert(math.abs(err) < accept, 
       s"Difference between calculated and approximated gradient too large ($dEdW - $gradw = $err)"
         )
-
         arrTmpWeights(w) = arrTmpWeights(w) - eps
-
         w += 1
       }
-
       cnt += 1
     }
-
   }
-
 }

From ccbed58b0c5fe23c50d5caf8267a0f6a627c8825 Mon Sep 17 00:00:00 2001
From: Alexander Ulanov <nashb@yandex.ru>
Date: Fri, 26 Sep 2014 17:32:13 +0400
Subject: [PATCH 057/143] Unit test parameter

---
 mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
index 35743a2fc6119..4525391775eb3 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
@@ -38,7 +38,7 @@ class ANNSuite extends FunSuite with LocalSparkContext {
       (Vectors.dense(features), Vectors.dense(Array(label)))}
     val rddData = sc.parallelize(data, 2)
     val hiddenLayersTopology = Array[Int](hiddenSize)
-    val model = ArtificialNeuralNetwork.train(rddData, hiddenLayersTopology, 500, 1e-5)
+    val model = ArtificialNeuralNetwork.train(rddData, hiddenLayersTopology, 2000, 1e-5)
     val predictionAndLabels = rddData.map { case(input, label) =>
       (model.predict(input)(0), label(0)) }.collect()
     assert(predictionAndLabels.forall { case(p, l) => (math.round(p) - l) == 0 })

From e3dc0038b37322ba8164b0689a62099f63a92cc1 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Sun, 28 Sep 2014 14:16:01 +0800
Subject: [PATCH 058/143] Update ANNSuite.scala

Make sure there are positive as well as negative weights in the gradient test.
---
 mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
index 4525391775eb3..981c6ce87c33e 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
@@ -69,7 +69,7 @@ class ANNSuite extends FunSuite with LocalSparkContext {
       val arrWeights = new Array[Double](noWeights)
       var w = 0
       while(w < noWeights) {
-        arrWeights(w) = rnd.nextDouble()
+        arrWeights(w) = rnd.nextDouble() * 4.8 - 2.4
         w += 1
       }
       val arrInp = new Array[Double](noInp)

From dd47d75fab98501b70cb164d18b2d7eff3e2e2b5 Mon Sep 17 00:00:00 2001
From: Alexander Ulanov <nashb@yandex.ru>
Date: Mon, 27 Oct 2014 17:44:34 -0700
Subject: [PATCH 059/143] ANN classifier draft

---
 .../mllib/classification/ANNClassifier.scala  | 70 +++++++++++++++++++
 1 file changed, 70 insertions(+)
 create mode 100644 mllib/src/main/scala/org/apache/spark/mllib/classification/ANNClassifier.scala

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/ANNClassifier.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/ANNClassifier.scala
new file mode 100644
index 0000000000000..8e3780ef1eb7b
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/ANNClassifier.scala
@@ -0,0 +1,70 @@
+package org.apache.spark.mllib.classification
+
+import org.apache.spark.mllib.ann.{ArtificialNeuralNetworkModel, ArtificialNeuralNetwork}
+import org.apache.spark.mllib.linalg.Vector
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.rdd.RDD
+import org.apache.spark.mllib.linalg.Vectors
+import breeze.linalg.{argmax => Bargmax}
+
+trait ANNClassifierHelper {
+
+  protected val labelToIndex: Map[Double, Int]
+  private val indexToLabel = labelToIndex.map(_.swap)
+  private val labelCount = labelToIndex.size
+
+  protected def labeledPointToVectorPair(labeledPoint: LabeledPoint) = {
+    val output = Array.fill(labelCount){0.0}
+    output(labelToIndex(labeledPoint.label)) = 1.0
+    (labeledPoint.features, Vectors.dense(output))
+  }
+
+  protected def outputToLabel(output: Vector): Double = {
+    val index = Bargmax(output.toBreeze.toDenseVector)
+    indexToLabel(index)
+  }
+}
+
+class ANNClassifierModel private[mllib](val annModel: ArtificialNeuralNetworkModel, val labelToIndex: Map[Double, Int])
+  extends ClassificationModel with ANNClassifierHelper with Serializable {
+  /**
+   * Predict values for the given data set using the model trained.
+   *
+   * @param testData RDD representing data points to be predicted
+   * @return an RDD[Double] where each entry contains the corresponding prediction
+   */
+  override def predict(testData: RDD[Vector]): RDD[Double] = testData.map(predict)
+
+  /**
+   * Predict values for a single data point using the model trained.
+   *
+   * @param testData array representing a single data point
+   * @return predicted category from the trained model
+   */
+  override def predict(testData: Vector): Double = {
+    val output = annModel.predict(testData)
+    outputToLabel(output)
+  }
+}
+
+class ANNClassifier private(val labelToIndex: Map[Double, Int]) extends ANNClassifierHelper {
+
+  def run(data: RDD[LabeledPoint]): ANNClassifierModel = {
+    val featureCount = data.first().features.size
+    val hiddenSize = featureCount / 2 + 1
+    val numSteps = 2000
+    val hiddenLayersTopology = Array[Int](hiddenSize)
+    val annData = data.map(lp => labeledPointToVectorPair(lp))
+    /* train the model */
+    val model = ArtificialNeuralNetwork.train(annData, hiddenLayersTopology, numSteps, 1e-5)
+    new ANNClassifierModel(model, labelToIndex)
+  }
+}
+
+object ANNClassifier {
+
+  def train(data: RDD[LabeledPoint]): ANNClassifierModel = {
+    val labelToIndex = data.map( lp => lp.label).distinct().collect().zipWithIndex.toMap
+    new ANNClassifier(labelToIndex).run(data)
+  }
+}

From 3e7eca196360de447a0805a5b6c0a7368eededb6 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Tue, 28 Oct 2014 19:27:15 +0800
Subject: [PATCH 060/143] Update ArtificialNeuralNetwork.scala

Add support for customised initial weights
---
 .../mllib/ann/ArtificialNeuralNetwork.scala   | 85 +++++++++++++++++--
 1 file changed, 77 insertions(+), 8 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index 2d6110e2b1fc7..3ea5ca51eb968 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -36,7 +36,7 @@ import org.apache.spark.util.random.XORShiftRandom
  *
  * For a network of H hidden layers:
  *
- * hiddenLayersTopology(h) indicates the number of nodes in hidden layer h, excluding the bias 
+ * hiddenLayersTopology(h) indicates the number of nodes in hidden layer h, excluding the bias
  * node. h counts from 0 (first hidden layer, taking inputs from input layer) to H - 1 (last
  * hidden layer, sending outputs to the output layer).
  *
@@ -76,7 +76,7 @@ import org.apache.spark.util.random.XORShiftRandom
 
 /**
  * Artificial neural network (ANN) model
- * 
+ *
  * @param weights the weights between the neurons in the ANN.
  * @param topology array containing the number of nodes per layer in the network, including
  * the nodes in the input and output layer, but excluding the bias nodes.
@@ -171,7 +171,7 @@ object ArtificialNeuralNetwork {
       trainingRDD: RDD[(Vector, Vector)],
       hiddenLayersTopology: Array[Int],
       maxNumIterations: Int): ArtificialNeuralNetworkModel = {
-    train( trainingRDD, hiddenLayersTopology, maxNumIterations, defaultTolerance)
+    train(trainingRDD, hiddenLayersTopology, maxNumIterations, defaultTolerance)
   }
 
   /**
@@ -190,6 +190,23 @@ object ArtificialNeuralNetwork {
     train(trainingRDD, model, maxNumIterations, defaultTolerance)
   }
 
+  /**
+   * Trains an ANN with given initial weights.
+   * Uses default convergence tolerance 1e-4 for LBFGS.
+   *
+   * @param trainingRDD RDD containing (input, output) pairs for training.
+   * @param initialWeights initial weights vector.
+   * @param maxNumIterations maximum number of training iterations.
+   * @return ANN model.
+   */
+  def train(
+      trainingRDD: RDD[(Vector,Vector)],
+      hiddenLayersTopology: Array[Int],
+      initialWeights: Vector,
+      maxNumIterations: Int): ArtificialNeuralNetworkModel = {
+    train(trainingRDD, hiddenLayersTopology, initialWeights, maxNumIterations, defaultTolerance)
+  }
+
   /**
    * Trains an ANN using customized convergence tolerance.
    *
@@ -204,7 +221,7 @@ object ArtificialNeuralNetwork {
       model: ArtificialNeuralNetworkModel,
       maxNumIterations: Int,
       convergenceTol: Double): ArtificialNeuralNetworkModel = {
-    new ArtificialNeuralNetwork( model.topology, maxNumIterations, convergenceTol ).
+    new ArtificialNeuralNetwork(model.topology, maxNumIterations, convergenceTol).
       run(trainingRDD, model.weights)
   }
 
@@ -224,17 +241,69 @@ object ArtificialNeuralNetwork {
       convergenceTol: Double): ArtificialNeuralNetworkModel = {
     val topology = convertTopology(trainingRDD, hiddenLayersTopology)
     new ArtificialNeuralNetwork(topology, maxNumIterations, convergenceTol).
-      run(trainingRDD, randomWeights(topology))
+      run(trainingRDD, randomWeights(topology, false))
+  }
+
+  /**
+   * Trains an ANN with given initial weights.
+   *
+   * @param trainingRDD RDD containing (input, output) pairs for training.
+   * @param initialWeights initial weights vector.
+   * @param maxNumIterations maximum number of training iterations.
+   * @param convergenceTol convergence tolerance for LBFGS. Smaller value for closer convergence.
+   * @return ANN model.
+   */
+  def train(
+      trainingRDD: RDD[(Vector,Vector)],
+      hiddenLayersTopology: Array[Int],
+      initialWeights: Vector,
+      maxNumIterations: Int,
+      convergenceTol: Double): ArtificialNeuralNetworkModel = {
+    val topology = convertTopology(trainingRDD, hiddenLayersTopology)
+    new ArtificialNeuralNetwork(topology, maxNumIterations, convergenceTol).
+      run(trainingRDD, initialWeights)
+  }
+
+  /**
+   * Provides a random weights vector.
+   *
+   * @param trainingRDD RDD containing (input, output) pairs for training.
+   * @param hiddenLayersTopology number of nodes per hidden layer, excluding the bias nodes.
+   * @return random weights vector.
+   */
+  def getRandomWeights(
+      trainingRDD: RDD[(Vector,Vector)],
+      hiddenLayersTopology: Array[Int]): Vector = {
+    val topology = convertTopology(trainingRDD, hiddenLayersTopology)
+    return randomWeights(topology, false)
+  }
+
+  /**
+   * Provides a random weights vector, using given random seed.
+   *
+   * @param trainingRDD RDD containing (input, output) pairs for later training.
+   * @param hiddenLayersTopology number of nodes per hidden layer, excluding the bias nodes.
+   * @param seed random generator seed.
+   * @return random weights vector.
+   */
+  def getRandomWeights(
+      trainingRDD: RDD[(Vector,Vector)],
+      hiddenLayersTopology: Array[Int],
+      seed: Int): Vector = {
+    val topology = convertTopology(trainingRDD, hiddenLayersTopology)
+    return randomWeights(topology, true, seed)
   }
 
-  private def convertTopology( input: RDD[(Vector,Vector)],
+  private def convertTopology(
+      input: RDD[(Vector,Vector)],
       hiddenLayersTopology: Array[Int] ): Array[Int] = {
     val firstElt = input.first
     firstElt._1.size +: hiddenLayersTopology :+ firstElt._2.size
   }
 
-  private def randomWeights(topology: Array[Int]): Vector = {
-    val rand = new XORShiftRandom()
+  private def randomWeights(topology: Array[Int], useSeed: Boolean, seed: Int = 0): Vector = {
+    val rand: XORShiftRandom =
+      if( useSeed == false ) new XORShiftRandom() else new XORShiftRandom(seed)
     var i: Int = 0
     var l: Int = 0
     val noWeights = {

From f8d5a05ab18c91274e53a5ed4c40769177a1301a Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Tue, 28 Oct 2014 19:28:33 +0800
Subject: [PATCH 061/143] Update ANNSuite.scala

Uses initial weights generated from fixed random seed for XOR test.
---
 .../test/scala/org/apache/spark/mllib/ann/ANNSuite.scala  | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
index 981c6ce87c33e..69445ef6471c5 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
@@ -33,12 +33,12 @@ class ANNSuite extends FunSuite with LocalSparkContext {
       Array[Double](1,1)
     )
     val outputs = Array[Double](0, 1, 1, 0)
-    val hiddenSize = 5
     val data = inputs.zip(outputs).map { case(features, label) =>
       (Vectors.dense(features), Vectors.dense(Array(label)))}
     val rddData = sc.parallelize(data, 2)
-    val hiddenLayersTopology = Array[Int](hiddenSize)
-    val model = ArtificialNeuralNetwork.train(rddData, hiddenLayersTopology, 2000, 1e-5)
+    val hiddenLayersTopology = Array[Int](5)
+    val initialWeights = ArtificialNeuralNetwork.getRandomWeights(rddData, hiddenLayersTopology, 0x01234567)
+    val model = ArtificialNeuralNetwork.train(rddData, hiddenLayersTopology, initialWeights, 200)
     val predictionAndLabels = rddData.map { case(input, label) =>
       (model.predict(input)(0), label(0)) }.collect()
     assert(predictionAndLabels.forall { case(p, l) => (math.round(p) - l) == 0 })
@@ -107,7 +107,7 @@ class ANNSuite extends FunSuite with LocalSparkContext {
         val dEdW = ( E2 - E1 ) / eps
         val gradw = gradient(w)
         val err = dEdW - gradw
-        assert(math.abs(err) < accept, 
+        assert(math.abs(err) < accept,
       s"Difference between calculated and approximated gradient too large ($dEdW - $gradw = $err)"
         )
         arrTmpWeights(w) = arrTmpWeights(w) - eps

From 57b9147f7bc0eca1fb8a42601a87f5e11b151b7e Mon Sep 17 00:00:00 2001
From: Alexander Ulanov <nashb@yandex.ru>
Date: Tue, 28 Oct 2014 11:29:41 -0700
Subject: [PATCH 062/143] XOR classification test with draft

---
 .../mllib/classification/ANNClassifier.scala  | 26 +++++++++++++------
 .../classification/ANNClassifierSuite.scala   | 26 +++++++++++++++++++
 2 files changed, 44 insertions(+), 8 deletions(-)
 create mode 100644 mllib/src/test/scala/org/apache/spark/mllib/classification/ANNClassifierSuite.scala

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/ANNClassifier.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/ANNClassifier.scala
index 8e3780ef1eb7b..59efcee31d4df 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/ANNClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/ANNClassifier.scala
@@ -47,24 +47,34 @@ class ANNClassifierModel private[mllib](val annModel: ArtificialNeuralNetworkMod
   }
 }
 
-class ANNClassifier private(val labelToIndex: Map[Double, Int]) extends ANNClassifierHelper {
+class ANNClassifier private(val labelToIndex: Map[Double, Int],
+                             private val hiddenLayersTopology: Array[Int],
+                             private val maxIterations: Int,
+                             private val stepSize: Double,
+                             private val convergeTol: Double)
+  extends ANNClassifierHelper with Serializable {
 
   def run(data: RDD[LabeledPoint]): ANNClassifierModel = {
-    val featureCount = data.first().features.size
-    val hiddenSize = featureCount / 2 + 1
-    val numSteps = 2000
-    val hiddenLayersTopology = Array[Int](hiddenSize)
     val annData = data.map(lp => labeledPointToVectorPair(lp))
     /* train the model */
-    val model = ArtificialNeuralNetwork.train(annData, hiddenLayersTopology, numSteps, 1e-5)
+    val model = ArtificialNeuralNetwork.train(annData, hiddenLayersTopology, maxIterations, convergeTol)
     new ANNClassifierModel(model, labelToIndex)
   }
 }
 
 object ANNClassifier {
 
-  def train(data: RDD[LabeledPoint]): ANNClassifierModel = {
+  def train(data: RDD[LabeledPoint], hiddenLayersTopology: Array[Int], maxIterations: Int,
+            stepSize: Double, convergenceTol: Double): ANNClassifierModel = {
     val labelToIndex = data.map( lp => lp.label).distinct().collect().zipWithIndex.toMap
-    new ANNClassifier(labelToIndex).run(data)
+    new ANNClassifier(labelToIndex, hiddenLayersTopology, maxIterations, stepSize, convergenceTol).run(data)
   }
+
+  def train(data: RDD[LabeledPoint]): ANNClassifierModel = {
+    val featureCount = data.first().features.size
+    val hiddenSize = featureCount / 2 + 1
+    val hiddenLayersTopology = Array[Int](hiddenSize)
+    train(data, hiddenLayersTopology, 2000, 1.0, 1e-4)
+  }
+
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/ANNClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/ANNClassifierSuite.scala
new file mode 100644
index 0000000000000..2569d5688036a
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/ANNClassifierSuite.scala
@@ -0,0 +1,26 @@
+package org.apache.spark.mllib.classification
+
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.regression.LabeledPoint
+import org.apache.spark.mllib.util.LocalSparkContext
+import org.scalatest.FunSuite
+
+class ANNClassifierSuite extends FunSuite with LocalSparkContext {
+
+  test("ANN classifier test"){
+    val inputs = Array[Array[Double]](
+      Array[Double](0,0),
+      Array[Double](0,1),
+      Array[Double](1,0),
+      Array[Double](1,1)
+    )
+    val outputs = Array[Double](0, 1, 1, 0)
+    val data = inputs.zip(outputs).map{ case(input, output) =>
+      new LabeledPoint(output, Vectors.dense(input))}
+    val rddData = sc.parallelize(data, 2)
+    val model = ANNClassifier.train(rddData)
+    val predictionAndLabels = rddData.map(lp => (model.predict(lp.features), lp.label))
+    predictionAndLabels.foreach(println)
+  }
+
+}

From c189bb260860b7f193650c3fe9bf12df9c8591b9 Mon Sep 17 00:00:00 2001
From: Alexander Ulanov <nashb@yandex.ru>
Date: Tue, 28 Oct 2014 15:22:50 -0700
Subject: [PATCH 063/143] ANN classifier refactoring in progress: need random
 weight function

---
 .../mllib/classification/ANNClassifier.scala  | 21 ++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/ANNClassifier.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/ANNClassifier.scala
index 59efcee31d4df..90af89f2681d5 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/ANNClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/ANNClassifier.scala
@@ -49,6 +49,7 @@ class ANNClassifierModel private[mllib](val annModel: ArtificialNeuralNetworkMod
 
 class ANNClassifier private(val labelToIndex: Map[Double, Int],
                              private val hiddenLayersTopology: Array[Int],
+                             private val initialWeights: Vector,
                              private val maxIterations: Int,
                              private val stepSize: Double,
                              private val convergeTol: Double)
@@ -57,17 +58,25 @@ class ANNClassifier private(val labelToIndex: Map[Double, Int],
   def run(data: RDD[LabeledPoint]): ANNClassifierModel = {
     val annData = data.map(lp => labeledPointToVectorPair(lp))
     /* train the model */
-    val model = ArtificialNeuralNetwork.train(annData, hiddenLayersTopology, maxIterations, convergeTol)
+    val model = ArtificialNeuralNetwork.train(annData, hiddenLayersTopology/*, initialWeights*/, maxIterations, convergeTol)
     new ANNClassifierModel(model, labelToIndex)
   }
 }
 
 object ANNClassifier {
 
-  def train(data: RDD[LabeledPoint], hiddenLayersTopology: Array[Int], maxIterations: Int,
+  def train(data: RDD[LabeledPoint], hiddenLayersTopology: Array[Int],
+            initialWeights: Vector, maxIterations: Int,
             stepSize: Double, convergenceTol: Double): ANNClassifierModel = {
     val labelToIndex = data.map( lp => lp.label).distinct().collect().zipWithIndex.toMap
-    new ANNClassifier(labelToIndex, hiddenLayersTopology, maxIterations, stepSize, convergenceTol).run(data)
+    new ANNClassifier(labelToIndex, hiddenLayersTopology,
+      initialWeights, maxIterations, stepSize, convergenceTol).run(data)
+  }
+
+  def train(data: RDD[LabeledPoint], hiddenLayersTopology: Array[Int], maxIterations: Int,
+            stepSize: Double, convergenceTol: Double): ANNClassifierModel = {
+    val initialWeights = randomWeights(data, hiddenLayersTopology)
+    train(data, hiddenLayersTopology, initialWeights, maxIterations, stepSize, convergenceTol)
   }
 
   def train(data: RDD[LabeledPoint]): ANNClassifierModel = {
@@ -77,4 +86,10 @@ object ANNClassifier {
     train(data, hiddenLayersTopology, 2000, 1.0, 1e-4)
   }
 
+  /* TODO: remove duplication - the same analysis will be done in ANNClassifier.run() */
+  def randomWeights(data: RDD[LabeledPoint], hiddenLayersTopology: Array[Int]) = {
+    val labelCount = data.map( lp => lp.label).distinct().collect().length
+    val featureCount = data.first().features.size
+    null
+  }
 }

From c4baf798e671184f09a160d318efdb92ae41a800 Mon Sep 17 00:00:00 2001
From: Alexander Ulanov <nashb@yandex.ru>
Date: Tue, 28 Oct 2014 15:34:30 -0700
Subject: [PATCH 064/143] Minor stylefix, add additional function for
 customized initial weights

---
 .../mllib/ann/ArtificialNeuralNetwork.scala   | 22 +++++++++++++++++--
 .../org/apache/spark/mllib/ann/ANNSuite.scala |  2 +-
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index 3ea5ca51eb968..392f6e458523f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -271,7 +271,7 @@ object ArtificialNeuralNetwork {
    * @param hiddenLayersTopology number of nodes per hidden layer, excluding the bias nodes.
    * @return random weights vector.
    */
-  def getRandomWeights(
+  def randomWeights(
       trainingRDD: RDD[(Vector,Vector)],
       hiddenLayersTopology: Array[Int]): Vector = {
     val topology = convertTopology(trainingRDD, hiddenLayersTopology)
@@ -286,7 +286,7 @@ object ArtificialNeuralNetwork {
    * @param seed random generator seed.
    * @return random weights vector.
    */
-  def getRandomWeights(
+  def randomWeights(
       trainingRDD: RDD[(Vector,Vector)],
       hiddenLayersTopology: Array[Int],
       seed: Int): Vector = {
@@ -294,6 +294,24 @@ object ArtificialNeuralNetwork {
     return randomWeights(topology, true, seed)
   }
 
+  /**
+   * Provides a random weights vector, using given random seed.
+   *
+   * @param inputLayerSize size of input layer.
+   * @param outputLayerSize size of output layer.
+   * @param hiddenLayersTopology number of nodes per hidden layer, excluding the bias nodes.
+   * @param seed random generator seed.
+   * @return random weights vector.
+   */
+  def randomWeights(
+                     inputLayerSize: Int,
+                     outputLayerSize: Int,
+                     hiddenLayersTopology: Array[Int],
+                     seed: Int): Vector = {
+    val topology = inputLayerSize +: hiddenLayersTopology :+ outputLayerSize
+    return randomWeights(topology, true, seed)
+  }
+
   private def convertTopology(
       input: RDD[(Vector,Vector)],
       hiddenLayersTopology: Array[Int] ): Array[Int] = {
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
index 69445ef6471c5..d95846d97c3b7 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
@@ -37,7 +37,7 @@ class ANNSuite extends FunSuite with LocalSparkContext {
       (Vectors.dense(features), Vectors.dense(Array(label)))}
     val rddData = sc.parallelize(data, 2)
     val hiddenLayersTopology = Array[Int](5)
-    val initialWeights = ArtificialNeuralNetwork.getRandomWeights(rddData, hiddenLayersTopology, 0x01234567)
+    val initialWeights = ArtificialNeuralNetwork.randomWeights(rddData, hiddenLayersTopology, 0x01234567)
     val model = ArtificialNeuralNetwork.train(rddData, hiddenLayersTopology, initialWeights, 200)
     val predictionAndLabels = rddData.map { case(input, label) =>
       (model.predict(input)(0), label(0)) }.collect()

From d0836ed696b23f7cb3b5550cd67de123d1222c29 Mon Sep 17 00:00:00 2001
From: Alexander Ulanov <nashb@yandex.ru>
Date: Fri, 31 Oct 2014 11:02:56 -0700
Subject: [PATCH 065/143] Model as a parameters for classifier

---
 .../mllib/classification/ANNClassifier.scala  | 40 +++++++++++++++++--
 .../classification/ANNClassifierSuite.scala   | 30 +++++++++++---
 2 files changed, 61 insertions(+), 9 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/ANNClassifier.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/ANNClassifier.scala
index 90af89f2681d5..451c93a865182 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/ANNClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/ANNClassifier.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.spark.mllib.classification
 
 import org.apache.spark.mllib.ann.{ArtificialNeuralNetworkModel, ArtificialNeuralNetwork}
@@ -7,6 +24,8 @@ import org.apache.spark.rdd.RDD
 import org.apache.spark.mllib.linalg.Vectors
 import breeze.linalg.{argmax => Bargmax}
 
+import scala.util.Random
+
 trait ANNClassifierHelper {
 
   protected val labelToIndex: Map[Double, Int]
@@ -25,7 +44,8 @@ trait ANNClassifierHelper {
   }
 }
 
-class ANNClassifierModel private[mllib](val annModel: ArtificialNeuralNetworkModel, val labelToIndex: Map[Double, Int])
+class ANNClassifierModel private[mllib](val annModel: ArtificialNeuralNetworkModel,
+                                        val labelToIndex: Map[Double, Int])
   extends ClassificationModel with ANNClassifierHelper with Serializable {
   /**
    * Predict values for the given data set using the model trained.
@@ -58,7 +78,8 @@ class ANNClassifier private(val labelToIndex: Map[Double, Int],
   def run(data: RDD[LabeledPoint]): ANNClassifierModel = {
     val annData = data.map(lp => labeledPointToVectorPair(lp))
     /* train the model */
-    val model = ArtificialNeuralNetwork.train(annData, hiddenLayersTopology/*, initialWeights*/, maxIterations, convergeTol)
+    val model = ArtificialNeuralNetwork.train(annData, hiddenLayersTopology,
+      initialWeights, maxIterations, convergeTol)
     new ANNClassifierModel(model, labelToIndex)
   }
 }
@@ -79,6 +100,12 @@ object ANNClassifier {
     train(data, hiddenLayersTopology, initialWeights, maxIterations, stepSize, convergenceTol)
   }
 
+  def train(data: RDD[LabeledPoint], model: ANNClassifierModel, maxIterations: Int,
+            stepSize: Double, convergenceTol: Double): ANNClassifierModel = {
+    val hiddenLayersTopology = model.annModel.topology.slice(1, model.annModel.topology.length - 1)
+    train(data, hiddenLayersTopology, model.annModel.weights, maxIterations, stepSize, convergenceTol)
+  }
+
   def train(data: RDD[LabeledPoint]): ANNClassifierModel = {
     val featureCount = data.first().features.size
     val hiddenSize = featureCount / 2 + 1
@@ -87,9 +114,14 @@ object ANNClassifier {
   }
 
   /* TODO: remove duplication - the same analysis will be done in ANNClassifier.run() */
-  def randomWeights(data: RDD[LabeledPoint], hiddenLayersTopology: Array[Int]) = {
+  def randomWeights(data: RDD[LabeledPoint],
+                    hiddenLayersTopology: Array[Int], seed: Int): Vector = {
     val labelCount = data.map( lp => lp.label).distinct().collect().length
     val featureCount = data.first().features.size
-    null
+    ArtificialNeuralNetwork.randomWeights(featureCount, labelCount, hiddenLayersTopology, seed)
+  }
+
+  def randomWeights(data: RDD[LabeledPoint], hiddenLayersTopology: Array[Int]): Vector = {
+    randomWeights(data, hiddenLayersTopology, Random.nextInt())
   }
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/ANNClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/ANNClassifierSuite.scala
index 2569d5688036a..3eed26fd72531 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/ANNClassifierSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/ANNClassifierSuite.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.spark.mllib.classification
 
 import org.apache.spark.mllib.linalg.Vectors
@@ -7,7 +24,7 @@ import org.scalatest.FunSuite
 
 class ANNClassifierSuite extends FunSuite with LocalSparkContext {
 
-  test("ANN classifier test"){
+  test("ANN classifier test for XOR"){
     val inputs = Array[Array[Double]](
       Array[Double](0,0),
       Array[Double](0,1),
@@ -18,9 +35,12 @@ class ANNClassifierSuite extends FunSuite with LocalSparkContext {
     val data = inputs.zip(outputs).map{ case(input, output) =>
       new LabeledPoint(output, Vectors.dense(input))}
     val rddData = sc.parallelize(data, 2)
-    val model = ANNClassifier.train(rddData)
-    val predictionAndLabels = rddData.map(lp => (model.predict(lp.features), lp.label))
-    predictionAndLabels.foreach(println)
+    val hiddenLayerTopology = Array[Int]{5}
+    val initialWeights = ANNClassifier.randomWeights(rddData, hiddenLayerTopology, 0x01234567)
+    val model = ANNClassifier.train(rddData, hiddenLayerTopology, initialWeights, 200, 1.0, 1e-4)
+    val predictionAndLabels = rddData.map(lp =>
+      (model.predict(lp.features), lp.label)).collect()
+    assert(predictionAndLabels.forall { case(p, l) =>
+      (p - l) == 0 })
   }
-
 }

From 01bbca01058aaced98c15f2cdd409bbc0a27c6af Mon Sep 17 00:00:00 2001
From: Alexander Ulanov <nashb@yandex.ru>
Date: Tue, 4 Nov 2014 16:13:30 -0800
Subject: [PATCH 066/143] Scala style fix

---
 .../apache/spark/mllib/classification/ANNClassifier.scala   | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/ANNClassifier.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/ANNClassifier.scala
index 451c93a865182..670038f72957c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/ANNClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/ANNClassifier.scala
@@ -102,8 +102,10 @@ object ANNClassifier {
 
   def train(data: RDD[LabeledPoint], model: ANNClassifierModel, maxIterations: Int,
             stepSize: Double, convergenceTol: Double): ANNClassifierModel = {
-    val hiddenLayersTopology = model.annModel.topology.slice(1, model.annModel.topology.length - 1)
-    train(data, hiddenLayersTopology, model.annModel.weights, maxIterations, stepSize, convergenceTol)
+    val hiddenLayersTopology =
+      model.annModel.topology.slice(1, model.annModel.topology.length - 1)
+    train(data, hiddenLayersTopology, model.annModel.weights,
+      maxIterations, stepSize, convergenceTol)
   }
 
   def train(data: RDD[LabeledPoint]): ANNClassifierModel = {

From c7e532313985cd10b892e290a466c294661601d5 Mon Sep 17 00:00:00 2001
From: Alexander Ulanov <nashb@yandex.ru>
Date: Fri, 5 Dec 2014 16:46:59 -0800
Subject: [PATCH 067/143] Encoding of output with 0.1 and 0.9 by bgreeven
 suggestion

---
 .../org/apache/spark/mllib/classification/ANNClassifier.scala | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/ANNClassifier.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/ANNClassifier.scala
index 670038f72957c..c12e2c1fd3455 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/ANNClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/ANNClassifier.scala
@@ -33,8 +33,8 @@ trait ANNClassifierHelper {
   private val labelCount = labelToIndex.size
 
   protected def labeledPointToVectorPair(labeledPoint: LabeledPoint) = {
-    val output = Array.fill(labelCount){0.0}
-    output(labelToIndex(labeledPoint.label)) = 1.0
+    val output = Array.fill(labelCount){0.1}
+    output(labelToIndex(labeledPoint.label)) = 0.9
     (labeledPoint.features, Vectors.dense(output))
   }
 

From 90f5ae56e3b829b4ddf389253a822e854e4a00da Mon Sep 17 00:00:00 2001
From: Alexander Ulanov <nashb@yandex.ru>
Date: Wed, 10 Dec 2014 14:15:03 -0800
Subject: [PATCH 068/143] Addressing bgreeven comment regarding labels sort,
 annotations

---
 .../mllib/classification/ANNClassifier.scala  | 71 +++++++++++++++++--
 1 file changed, 67 insertions(+), 4 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/ANNClassifier.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/ANNClassifier.scala
index c12e2c1fd3455..3a0537bf91888 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/ANNClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/ANNClassifier.scala
@@ -84,30 +84,74 @@ class ANNClassifier private(val labelToIndex: Map[Double, Int],
   }
 }
 
+/**
+ * Top level methods for training the classifier based on artificial neural network (ANN)
+ */
 object ANNClassifier {
 
+  /**
+   * Trains an ANN classifier.
+   *
+   * @param data RDD containing labeled points for training.
+   * @param hiddenLayersTopology number of nodes per hidden layer, excluding the bias nodes.
+   * @param initialWeights initial weights of underlying artificial neural network
+   * @param maxIterations specifies maximum number of training iterations.
+   * @param stepSize step size (not implemented)
+   * @param convergenceTol convergence tolerance for LBFGS
+   * @return ANN model.
+   */
   def train(data: RDD[LabeledPoint], hiddenLayersTopology: Array[Int],
             initialWeights: Vector, maxIterations: Int,
             stepSize: Double, convergenceTol: Double): ANNClassifierModel = {
-    val labelToIndex = data.map( lp => lp.label).distinct().collect().zipWithIndex.toMap
+    val labelToIndex = data.map( lp => lp.label).distinct().collect().sorted.zipWithIndex.toMap
     new ANNClassifier(labelToIndex, hiddenLayersTopology,
       initialWeights, maxIterations, stepSize, convergenceTol).run(data)
   }
 
+  /**
+   * Trains an ANN classifier.
+   *
+   * @param data RDD containing labeled points for training.
+   * @param hiddenLayersTopology number of nodes per hidden layer, excluding the bias nodes.
+   * @param maxIterations specifies maximum number of training iterations.
+   * @param stepSize step size (not implemented)
+   * @param convergenceTol convergence tolerance for LBFGS
+   * @return ANN classifier model.
+   */
   def train(data: RDD[LabeledPoint], hiddenLayersTopology: Array[Int], maxIterations: Int,
             stepSize: Double, convergenceTol: Double): ANNClassifierModel = {
     val initialWeights = randomWeights(data, hiddenLayersTopology)
     train(data, hiddenLayersTopology, initialWeights, maxIterations, stepSize, convergenceTol)
   }
 
+  /**
+   * Trains an already pre-trained ANN classifier.
+   * Assumes that the data has the same labels that the
+   * data that were used for training, or at least the
+   * subset of that labels
+   *
+   * @param data RDD containing labeled points for training.
+   * @param model a pre-trained ANN classifier model.
+   * @param maxIterations specifies maximum number of training iterations.
+   * @param stepSize step size (not implemented)
+   * @param convergenceTol convergence tolerance for LBFGS
+   * @return ANN classifier model.
+   */
   def train(data: RDD[LabeledPoint], model: ANNClassifierModel, maxIterations: Int,
             stepSize: Double, convergenceTol: Double): ANNClassifierModel = {
     val hiddenLayersTopology =
       model.annModel.topology.slice(1, model.annModel.topology.length - 1)
-    train(data, hiddenLayersTopology, model.annModel.weights,
-      maxIterations, stepSize, convergenceTol)
+    new ANNClassifier(model.labelToIndex, hiddenLayersTopology,
+      model.annModel.weights, maxIterations, stepSize, convergenceTol).run(data)
   }
 
+  /**
+   * Trains an ANN classifier with one hidden layer of size (featureCount / 2 + 1)
+   * with 2000 steps of size 1.0 and tolerance 1e-4
+   *
+   * @param data RDD containing labeled points for training.
+   * @return ANN classifier model.
+   */
   def train(data: RDD[LabeledPoint]): ANNClassifierModel = {
     val featureCount = data.first().features.size
     val hiddenSize = featureCount / 2 + 1
@@ -115,14 +159,33 @@ object ANNClassifier {
     train(data, hiddenLayersTopology, 2000, 1.0, 1e-4)
   }
 
-  /* TODO: remove duplication - the same analysis will be done in ANNClassifier.run() */
+  /**
+   * Returns random weights for the ANN classifier with the given hidden layers
+   * and data dimensionality, i.e. the weights for the following topology:
+   * [numFeatures -: hiddenLayers :- numLabels]
+   *
+   * @param data RDD containing labeled points for training.
+   * @param hiddenLayersTopology number of nodes per hidden layer, excluding the bias nodes.
+   * @param seed
+   * @return vector with random weights.
+   */
   def randomWeights(data: RDD[LabeledPoint],
                     hiddenLayersTopology: Array[Int], seed: Int): Vector = {
+    /* TODO: remove duplication - the same analysis will be done in ANNClassifier.run() */
     val labelCount = data.map( lp => lp.label).distinct().collect().length
     val featureCount = data.first().features.size
     ArtificialNeuralNetwork.randomWeights(featureCount, labelCount, hiddenLayersTopology, seed)
   }
 
+  /**
+   * Returns random weights for the ANN classifier with the given hidden layers
+   * and data dimensionality, i.e. the weights for the following topology:
+   * [numFeatures -: hiddenLayers :- numLabels]
+   *
+   * @param data RDD containing labeled points for training.
+   * @param hiddenLayersTopology number of nodes per hidden layer, excluding the bias nodes.
+   * @return vector with random weights.
+   */
   def randomWeights(data: RDD[LabeledPoint], hiddenLayersTopology: Array[Int]): Vector = {
     randomWeights(data, hiddenLayersTopology, Random.nextInt())
   }

From 243e6679b362e9bc5be104f8b27120df1c4dd14c Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Thu, 3 Jul 2014 11:33:22 +0800
Subject: [PATCH 069/143] Create ParallelANN.scala

This is the main ParallelANN class and associated Model
---
 .../apache/spark/mllib/ann/ParallelANN.scala  | 391 ++++++++++++++++++
 1 file changed, 391 insertions(+)
 create mode 100644 mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala
new file mode 100644
index 0000000000000..f0668d77895d2
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala
@@ -0,0 +1,391 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.ann
+
+import org.apache.spark.rdd.RDD
+import org.apache.spark.mllib.linalg.Vector
+import org.apache.spark.mllib.optimization._
+import org.apache.spark.mllib.linalg.Vectors
+import breeze.linalg.DenseVector
+import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.rdd.RDD
+import breeze.linalg.{axpy => brzAxpy, Vector => BV}
+import breeze.linalg.{Vector => BV}
+import breeze.linalg.{axpy => brzAxpy}
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.mllib.regression.RegressionModel
+
+/*
+ * Implements a Artificial Neural Network (ANN)
+ * 
+ * format of data:
+ * data[ 0..noInput-1 ]: Input
+ * data[ noInput..noInput+noOutput-1 ]: Output
+ *
+ */
+
+trait ANN {
+  
+  def noInput: Integer
+  def noHidden: Integer
+  def noOutput: Integer
+  def beta: Double
+  
+  def g( x: Double ) = (1/(1+math.exp(-beta*x)))
+  def dg( x: Double ) = beta*g(x)*(1-g(x))
+
+
+  /* returns the hidden layer including the -1 robonode! */
+  def computeHidden( data: Vector, weights: Vector ): Vector = {
+    
+    val brzData = data.toBreeze
+    val brzInp = DenseVector.vertcat( brzData( 0 to noInput-1 ).toDenseVector, DenseVector[Double](-1.0) )    
+    val brzWeights = weights.toBreeze
+    var hidden = DenseVector.zeros[Double]( noHidden+1 )
+    
+    for( j <- 0 to noHidden-1 ) {
+      
+      val weightsSubset = brzWeights( j*(noInput+1) to j*(noInput+1)+(noInput+1)-1 ).toVector
+      hidden( j ) = g( weightsSubset.dot( brzInp ) )
+      
+    }
+    
+    hidden( noHidden ) = -1.0
+    
+    Vectors.fromBreeze( hidden )
+    
+  }
+  
+  /* returns the hidden layer including the -1 robonode, as well as the final estimation */
+  def computeValues( data: Vector, weights: Vector ): (Vector, Vector) = {
+    
+    var hidden = computeHidden( data, weights )
+    var output = new Array[Double](noOutput)
+        
+    for( k<-0 to noOutput-1 ) {
+      val brzWeights = weights.toBreeze
+      var weightsSubset = brzWeights( noHidden*(noInput+1)+k*(noHidden+1) to noHidden*(noInput+1)+(k+1)*(noHidden+1)-1).toVector
+      output(k) = g( weightsSubset.dot( hidden.toBreeze ) )
+    }
+    
+    ( hidden, Vectors.dense( output ) )
+    
+  }  
+  
+}
+
+class ParallelANNModel private[mllib] 
+(
+    override val weights: Vector,
+    val noInp: Integer,
+    val noHid: Integer,
+    val noOut: Integer,
+    val b: Double )
+  extends GeneralizedSteepestDescendModel(weights) with RegressionModel with Serializable with ANN {  
+  
+  val noInput = noInp
+  val noHidden = noHid
+  val noOutput = noOut
+  val beta = b
+  
+  override def predictPoint( data: Vector, weights: Vector ): Double = {
+    val outp = computeValues( data, weights )._2
+    outp.toArray(0)
+  }
+  
+  def predictPointV( data: Vector, weights: Vector): Vector = {
+    computeValues( data, weights )._2
+  }   
+  
+}
+
+/**
+ * Train a linear regression model with no regularization using Stochastic Gradient Descent.
+ * This solves the least squares regression formulation
+ *              f(weights) = 1/n ||A weights-y||^2
+ * (which is the mean squared error).
+ * Here the data matrix has n rows, and the input RDD holds the set of rows of A, each with
+ * its corresponding right hand side label y.
+ * See also the documentation for the precise formulation.
+ */
+class ParallelANNWithSGD private (
+    private var stepSize: Double,
+    private var numIterations: Int,
+    private var miniBatchFraction: Double,
+    private var noInput: Int,
+    private var noHidden: Int,
+    private var noOutput: Int,
+    private val beta: Double )    
+  extends GeneralizedSteepestDescendAlgorithm[ParallelANNModel] with Serializable {
+  
+  private val gradient = new LeastSquaresGradientANN( noInput, noHidden, noOutput, beta )
+  private val updater = new ANNUpdater()
+  override val optimizer = new GradientDescent(gradient, updater)
+    .setStepSize(stepSize)
+    .setNumIterations(numIterations)
+    .setMiniBatchFraction(miniBatchFraction)
+
+  /**
+   * Construct a LinearRegression object with default parameters: {stepSize: 1.0,
+   * numIterations: 100, miniBatchFraction: 1.0}.
+   */
+  def this() = {
+    this( 1.0, 100, 1.0, 1, 5, 1, 1.0 )
+  }
+  
+  def this( noHidden: Int ) = {
+    this( 1.0, 100, 1.0, 1, noHidden, 1, 1.0 )
+  }
+  
+  def this( noInput: Int, noHidden: Int ) = {
+    this( 1.0, 100, 1.0, noInput, noHidden, 1, 1.0 )
+  }
+  
+  def this( noInput: Int, noHidden: Int, noOutput: Int ) = {
+    this( 1.0, 100, 1.0, noInput, noHidden, noOutput, 1.0 )
+  }
+
+  override protected def createModel(weights: Vector) = {
+    new ParallelANNModel( weights, noInput, noHidden, noOutput, beta )    
+  }
+  
+  def checkOutput( rdd: RDD[(Vector,Vector)] ) {
+    val oVals = rdd.flatMap( T => T._2.toArray )
+    var omax = oVals.max    
+    assert( omax <= 1 )
+    var omin = oVals.min
+    assert( omin >= 0 )    
+  }
+  
+  def randomDouble( i: Int ): Double = {    
+    (((i+5)*59049+(i+5)*78125)%65536).toDouble/65536
+  }  
+
+  def train( rdd: RDD[(Vector,Vector)] ): ParallelANNModel = {
+ 
+    val ft = rdd.first()
+    
+    assert( noInput == ft._1.size )
+    assert( noOutput == ft._2.size )
+    
+    checkOutput( rdd ) 
+    
+    val noWeights = (noInput+1)*noHidden + (noHidden+1)*noOutput
+
+    val initialWeightsArr = new Array[Double](noWeights)
+    
+    for( i <- 0 to (noInput+1)*noHidden-1 )
+      initialWeightsArr( i ) = (randomDouble(i)*4.8-2.4)/(noInput+1)
+    for( i <- 0 to (noHidden+1)*noOutput-1 )
+      initialWeightsArr( (noInput+1)*noHidden+i ) = (randomDouble(i)*4.8-2.4)/(noHidden+1)
+      
+    val initialWeights = Vectors.dense( initialWeightsArr )
+        
+    println( "Parameters:" )
+    println( "  noInput: "+noInput )
+    println( "  noHidden: "+noHidden )
+    println( "  noOutput: "+noOutput )
+    println( "  noWeights: "+noWeights )
+    
+    run( rdd, initialWeights )
+
+  }
+  
+  def train( rdd: RDD[(Vector,Vector)], model: ParallelANNModel ): ParallelANNModel = {
+    run( rdd, model.weights )
+  }
+
+}
+
+/**
+ * data consists of input vector and output vector, and has the following form:
+ * 
+ * [ ---input--- ---output---]
+ * 
+ * where input = data( 0 to noInput-1 ) and output = data( noInput to noInput+noOutput-1 ) 
+ * 
+ * V_ij is the weight from input node i to hidden node j
+ * W_jk is the weight from hidden node j to output node k
+ *  
+ * The weights have the following mapping:
+ * 
+ * V_ij goes to position i + j*(noInput+1)
+ * W_jk goes to position (noInput+1)*noHidden + j + k*(noHidden+1)
+ * 
+ * Gradient has same mapping, i.e.
+ * dE/dVij goes to i + j*(noInput+1)
+ * dE/dWjk goes to (noInput+1)*noHidden + j +k*(noHidden+1)
+ * 
+ * Where E = ((estOutput-output),(estOutput-output)), 
+ * the inner product of the difference between estimation and target output with itself.
+ */
+
+class LeastSquaresGradientANN( noInp: Integer, noHid: Integer, noOut: Integer, b: Double ) extends Gradient with ANN {
+      
+  val noInput = noInp
+  val noHidden = noHid
+  val noOutput = noOut
+  val beta = b   
+  
+  override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
+    
+    val brzData = data.toBreeze   
+    val brzInp = DenseVector.vertcat( brzData( 0 to noInput-1 ).toDenseVector, DenseVector[Double](-1.0) )
+    
+    val brzOut = brzData( noInput.toInt to noInput+noOutput-1 ).toVector
+    val brzWeights = weights.toBreeze        
+    val gradient = DenseVector.zeros[Double]( (noInput+1)*noHidden+(noHidden+1)*noOutput )
+    
+    
+    val (hidden, output) = computeValues( data, weights )
+    var brzHidden = hidden.toBreeze /* already includes the robonode */
+    val brzEst = output.toBreeze
+    val diff = brzEst :- brzOut
+    val E = diff.dot(diff)    
+    
+    /* 
+     * The following three fields are for verification only
+    val eps = .000001
+    val noInpCheck = 0
+    val noOutCheck = 0
+    */
+        
+    var brzWeights_tmp = weights.toBreeze
+    
+    /* Wjk */
+    for( j <-0 to noHidden ) {
+      
+      for( k <-0 to noOutput-1 ) {
+        
+    	val brzW = brzWeights( noHidden*(noInput+1)+k*(noHidden+1) to noHidden*(noInput+1)+(k+1)*(noHidden+1)-1 ).toVector
+        var sum_l = brzHidden.dot( brzW )
+        gradient( noHidden*(noInput+1)+k*(noHidden+1)+j ) = 2*(diff(k))*dg(sum_l)*brzHidden(j)
+
+        /* 
+         * The following is for verification only 
+        if( noInput==noInpCheck && noOutput==noOutCheck )
+        {
+	      brzWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j ) = brzWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j ) + eps             
+	      val est2 = computeValues( data, Vectors.fromBreeze( brzWeights_tmp ) )._2.toBreeze
+	      val diff2 = est2 - brzOut      
+	      val d = ( diff2.dot(diff2) - E ) / eps
+	      println( "Calc/Est Wjk: "+ ( gradient( noHidden*(noInput+1)+k*(noHidden+1)+j), d ) )
+		  brzWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j ) = brzWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j ) - eps
+        }        
+        */
+
+      }
+      
+    }
+    
+    /* Vij */    
+    for( i <- 0 to noInput ) {
+          
+      for( j <- 0 to noHidden-1 ) { /* the hidden robonode has no associated Vij */        
+      
+        for( k<- 0 to noOutput-1 ) {
+        
+          val brzW = brzWeights( noHidden*(noInput+1) to noHidden*(noInput+1)+(noHidden+1)-1 ).toVector
+          val sum_n1 = brzHidden.dot( brzW )
+          val brzV = brzWeights( j*(noInput+1) to j*(noInput+1)+(noInput+1)-1 ).toVector
+          val sum_n2 = brzV.dot( brzInp )
+          gradient( i+j*(noInput+1) ) = 
+          gradient( i+j*(noInput+1) ) + 2*(diff(k))*dg( sum_n1 )*brzWeights( noHidden*(noInput+1)+k*(noHidden+1)+j )*dg( sum_n2 )*brzInp( i )
+        }
+        
+        /* 
+         * The following is for verification only 
+        if( noInput==noInpCheck && noOutput==noOutCheck )
+        {
+          brzWeights_tmp( i+j*(noInput+1) ) = brzWeights_tmp( i+j*(noInput+1) ) + eps
+          val est2 = computeValues( data, Vectors.fromBreeze( brzWeights_tmp ) )._2.toBreeze
+          val diff2 = est2 - brzOut
+          val d = ( diff2.dot( diff2 ) - E ) / eps
+          println( "Calc/Est Vij: "+ ( gradient( i+j*(noInput+1) ), d ) )        
+          brzWeights_tmp( i+j*(noInput+1) ) = brzWeights_tmp( i+j*(noInput+1) ) - eps
+        }
+        */
+      }
+    }      
+    (Vectors.fromBreeze(gradient), E)
+
+  }
+
+  override def compute(
+      data: Vector,
+      label: Double,
+      weights: Vector,
+      cumGradient: Vector): Double = {
+    
+    val (grad, err) = compute( data, label, weights )
+    
+    cumGradient.toBreeze += grad.toBreeze
+
+    return err
+    
+  }
+}
+
+class ANNUpdater extends Updater {
+  
+  override def compute(
+      weightsOld: Vector,
+      gradient: Vector,
+      stepSize: Double,
+      iter: Int,
+      regParam: Double): (Vector, Double) = {
+       
+    val thisIterStepSize = stepSize  
+    
+    val brzWeights: BV[Double] = weightsOld.toBreeze.toDenseVector
+        
+    brzAxpy(-thisIterStepSize, gradient.toBreeze, brzWeights)
+    
+    (Vectors.fromBreeze(brzWeights), 0)
+  }
+  
+}
+
+class ParallelANN (
+    
+    private var stepSize: Double,
+    private var numIterations: Int,
+    private var miniBatchFraction: Double,
+    private var noInput: Int,
+    private var noHidden: Int,
+    private var noOutput: Int,
+    private val beta: Double
+    
+  ) extends GeneralizedSteepestDescendAlgorithm[ParallelANNModel] with Serializable {
+
+  private val gradient = new LeastSquaresGradientANN( noInput, noHidden, noOutput, beta )
+  private val updater = new SimpleUpdater()
+  override val optimizer = new GradientDescent(gradient, updater)
+    .setStepSize(stepSize)
+    .setNumIterations(numIterations)
+    .setMiniBatchFraction(miniBatchFraction)
+
+  def this() = {
+    this( 0.001, 100, 1.0, 1, 5, 1, 1.0 )
+  }
+  
+  override protected def createModel(weights: Vector) = {
+    new ParallelANNModel(weights, noInput, noHidden, noOutput, beta)
+  }  
+  
+}

From 96ba82a55ef214397e7a8ef06ddecac65414f3f8 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Thu, 3 Jul 2014 11:34:41 +0800
Subject: [PATCH 070/143] Create GeneralizedSteepestDescendAlgorithm

This is the general steepest descend model, with as inputs Vectors and outputs Vectors or Doubles.
---
 .../ann/GeneralizedSteepestDescendAlgorithm   | 165 ++++++++++++++++++
 1 file changed, 165 insertions(+)
 create mode 100644 mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescendAlgorithm

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescendAlgorithm b/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescendAlgorithm
new file mode 100644
index 0000000000000..bc5d9e0e31ec4
--- /dev/null
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescendAlgorithm
@@ -0,0 +1,165 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.ann
+
+import breeze.linalg.{DenseVector => BDV, SparseVector => BSV}
+import org.apache.spark.annotation.DeveloperApi
+import org.apache.spark.Logging
+import org.apache.spark.rdd.RDD
+import org.apache.spark.mllib.optimization._
+import org.apache.spark.mllib.linalg.{Vectors, Vector}
+import breeze.linalg.DenseVector
+import breeze.linalg.{DenseVector => BDV}
+import breeze.linalg.{SparseVector => BSV}
+
+/**
+ * :: DeveloperApi ::
+ * GeneralizedLinearModel (GLM) represents a model trained using
+ * GeneralizedLinearAlgorithm. GLMs consist of a weight vector and
+ * an intercept.
+ *
+ * @param weights Weights computed for every feature.
+ * @param intercept Intercept computed for this model.
+ */
+@DeveloperApi
+abstract class GeneralizedSteepestDescendModel(val weights: Vector )
+  extends Serializable {
+
+  /**
+   * Predict the result given a data point and the weights learned.
+   *
+   * @param dataMatrix Row vector containing the features for this data point
+   * @param weightMatrix Column vector containing the weights of the model
+   * 
+   * If the prediction model consists of a multi-dimensional vector, predictPoint
+   * returns only the first element of each vector. To get the whole vector, 
+   * use predictPointV instead.
+   */
+  protected def predictPoint( dataMatrix: Vector, weightMatrix: Vector ): Double
+  
+  /**
+   * Predict the result given a data point and the weights learned.
+   *
+   * @param dataMatrix Row vector containing the features for this data point
+   * @param weightMatrix Column vector containing the weights of the model
+   * 
+   * Returns the complete output vector.
+   */  
+  protected def predictPointV( dataMatrix: Vector, weightsMatrix: Vector ): Vector
+
+  /**
+   * Predict values for the given data set using the model trained.
+   *
+   * @param testData RDD representing data points to be predicted
+   * @return RDD[Double] where each entry contains the corresponding prediction
+   * 
+   * Returns only first element of output vector.
+   */
+  def predict( testData: RDD[Vector] ): RDD[Double] = {
+    
+    val localWeights = weights    
+    testData.map(v => predictPoint(v, localWeights ) )
+    
+  }
+  
+  /**
+   * Predict values for the given data set using the model trained.
+   *
+   * @param testData RDD representing data points to be predicted
+   * @return RDD[Vector] where each entry contains the corresponding prediction
+   * 
+   * Returns the complete output vector.
+   */  
+  def predictV( testData: RDD[Vector] ): RDD[Vector] = {
+    
+    val localWeights = weights
+    testData.map( v => predictPointV( v, localWeights ) )
+    
+  }
+  
+  /**
+   * Predict values for a single data point using the model trained.
+   *
+   * @param testData array representing a single data point
+   * @return Double prediction from the trained model
+   * 
+   * Returns only first element of output vector.
+   */
+  def predict( testData: Vector ): Double = {
+    
+    predictPoint( testData, weights )
+    
+  }
+  
+  /**
+   * Predict values for a single data point using the model trained.
+   *
+   * @param testData array representing a single data point
+   * @return Double prediction from the trained model
+   * 
+   * Returns the complete vector.
+   */
+  def predictV( testData: Vector ): Vector = {
+    
+    predictPointV( testData, weights )
+    
+  }
+
+}
+
+/**
+ * :: DeveloperApi ::
+ * GeneralizedSteepestDescend implements methods to train a function using
+ * the Steepest Descend algorithm.
+ * This class should be extended with an Optimizer to create a new GLM.
+ */
+@DeveloperApi
+abstract class GeneralizedSteepestDescendAlgorithm[M <: GeneralizedSteepestDescendModel]
+  extends Logging with Serializable {
+
+  /** The optimizer to solve the problem. */
+  def optimizer: Optimizer
+
+  /**
+   * Create a model given the weights
+   */
+  protected def createModel(weights: Vector): M
+  
+  /** Prepends one to the input vector. */
+  private def prependOne(vector: Vector): Vector = {
+    val vector1 = vector.toBreeze match {
+      case dv: BDV[Double] => BDV.vertcat(BDV.ones[Double](1), dv)
+      case sv: BSV[Double] => BSV.vertcat(new BSV[Double](Array(0), Array(1.0), 1), sv)
+      case v: Any => throw new IllegalArgumentException("Do not support vector type " + v.getClass)
+    }
+    Vectors.fromBreeze(vector1)
+  }
+
+  /**
+   * Run the algorithm with the configured parameters on an input RDD
+   * of LabeledPoint entries starting from the initial weights provided.
+   */
+  def run(input: RDD[(Vector,Vector)], initialWeights: Vector): M = {        
+    
+    val data = input.map( v => ( (0.0).toDouble, Vectors.fromBreeze( DenseVector.vertcat( v._1.toBreeze.toDenseVector, v._2.toBreeze.toDenseVector ) ) ) )
+    val weights = optimizer.optimize(data, initialWeights)
+
+    createModel( weights )
+    
+  }
+}

From 576ef79579976cf39ad3fa04e895bf636fbb4e32 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Thu, 3 Jul 2014 11:36:31 +0800
Subject: [PATCH 071/143] Create TestParallelANN.scala

This is a test program for parallel ANNs.
---
 .../spark/mllib/ann/TestParallelANN.scala     | 259 ++++++++++++++++++
 1 file changed, 259 insertions(+)
 create mode 100644 mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
new file mode 100644
index 0000000000000..813f138cab096
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
@@ -0,0 +1,259 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.spark.mllib.ann
+
+import org.apache.spark._
+import org.apache.spark.mllib.regression._
+import org.apache.spark.mllib.linalg._
+import org.apache.spark.mllib.ann._
+import scala.util.Random
+
+object TestParallelANN {
+  
+  var rand = new Random  
+  
+  def generateInput2D( f: Double => Double, xmin: Double, xmax: Double, noPoints: Int ): Array[(Vector,Vector)] = {
+    
+	var out = new Array[(Vector,Vector)](noPoints)
+	
+	for( i <- 0 to noPoints-1 ) {
+	  val x = xmin+rand.nextDouble()*(xmax-xmin)
+	  val y = f(x)	    
+	  out(i) = ( Vectors.dense( x ), Vectors.dense( y ) )	    
+	}	
+	  
+	return out
+  
+  }
+
+  
+  def generateInput3D( f: (Double,Double) => Double, xmin: Double, xmax: Double, ymin: Double, ymax: Double, noPoints: Int ): Array[(Vector,Vector)] = {
+	
+	var out = new Array[(Vector,Vector)](noPoints)
+	
+	for( i <- 0 to noPoints-1 ) {
+	  val x = xmin+rand.nextDouble()*(xmax-xmin)
+	  val y = ymin+rand.nextDouble()*(ymax-ymin)
+	  val z = f( x, y )
+	  var arr = new Array[Double](2)
+	  arr(0) = x
+	  arr(1) = y
+	  out(i) = ( Vectors.dense( arr ), Vectors.dense( z ) )
+	} 
+	  
+	out
+  
+  }  
+
+  def generateInput4D( f: Double => (Double,Double,Double), tmin: Double, tmax: Double, noPoints: Int ): Array[(Vector,Vector)] = {
+	
+	var out = new Array[(Vector,Vector)](noPoints)
+	
+	for( i <- 0 to noPoints-1 ) {
+	  
+	  val t: Double = tmin+rand.nextDouble()*(tmax-tmin)
+	  
+	  var arr = new Array[Double](3)
+	  
+	  var F = f(t)
+	  
+	  arr(0) = F._1
+	  arr(1) = F._2
+	  arr(2) = F._3
+	  
+	  out(i) = ( Vectors.dense( t ), Vectors.dense( arr ) )
+	} 
+	  
+	out
+  
+  }
+  
+  def f( T: Double ): Double = {
+    val y = 0.5+Math.abs(T/5).toInt.toDouble*.15+math.sin(T*math.Pi/10)*.1
+    assert( y<= 1)
+    y
+  }
+  
+  def f3D( x: Double, y: Double ): Double = {        
+    .5+.24*Math.sin( x*2*math.Pi/10 ) + .24*Math.cos( y*2*math.Pi/10 )    
+  }
+  
+  def f4D( t: Double ): (Double, Double,Double) = {  
+    val x = Math.abs(.8*Math.cos( t*2*math.Pi/20 ) )+.1
+    val y = (11+t)/22
+    val z = .5+.35*Math.sin(t*2*math.Pi/5)*Math.cos( t*2*math.Pi/10 ) + .15*t/11    
+    ( x, y, z )
+  }
+  
+  def concat( v1: Vector, v2: Vector ): Vector = {
+    
+    var a1 = v1.toArray
+    var a2 = v2.toArray
+    var a3 = new Array[Double]( a1.size + a2.size )
+    
+    for( i <- 0 to a1.size-1 ) {
+      a3(i) = a1(i)
+    }
+    
+    for( i<-0 to a2.size-1 ) {
+      a3(i+a1.size) = a2(i)
+    }
+    
+    Vectors.dense( a3 )
+    
+  }  
+
+  def main( arg: Array[String] ) {
+    
+    println( "Parallel ANN tester" )
+    
+    var curAngle: Double = 0.0
+    var graphic: Boolean = false
+    
+    if( (arg.length>0) && (arg(0)=="graph" ) ) {
+      graphic = true
+    }
+    
+    var outputFrame2D: OutputFrame2D = null
+    var outputFrame3D: OutputFrame3D = null
+    var outputFrame4D: OutputFrame3D = null
+        
+    if( graphic ) {
+      
+	  outputFrame2D = new OutputFrame2D( "x -> y" )
+	  outputFrame2D.apply
+	    
+	  outputFrame3D = new OutputFrame3D( "(x,y) -> z", 1 )
+	  outputFrame3D.apply    
+	        
+	  outputFrame4D = new OutputFrame3D( "t -> (x,y,z)" )
+      outputFrame4D.apply
+      
+    }
+    
+    var A = 20.0
+    var B = 50.0
+    
+    var conf = new SparkConf().setAppName("Parallel ANN").setMaster("local[5]")
+    var sc = new SparkContext(conf)
+    
+    val testRDD2D = sc.parallelize( generateInput2D( T => f(T), -10, 10, 100 ), 2).cache()    
+    val testRDD3D = sc.parallelize( generateInput3D( (x,y) => f3D(x,y), -10, 10, -10, 10, 100 ), 2).cache    
+    val testRDD4D = sc.parallelize( generateInput4D( t => f4D(t), -10, 10, 100 ), 2 ).cache
+    
+    if( graphic ) {
+      outputFrame2D.setData( testRDD2D.map( T => concat( T._1, T._2 ) ) )
+      outputFrame3D.setData( testRDD3D.map( T => concat( T._1, T._2 ) ) )    
+      outputFrame4D.setData( testRDD4D.map( T => T._2 ) )
+    }
+
+    val parallelANN2D = new ParallelANNWithSGD( 1, 10 )
+    parallelANN2D.optimizer.setNumIterations(1000).setStepSize( 1.0 )    
+      
+    val parallelANN3D = new ParallelANNWithSGD( 2, 20 )
+    parallelANN3D.optimizer.setNumIterations(1000).setStepSize( 1.0 )        
+        
+    val parallelANN4D = new ParallelANNWithSGD( 1, 20, 3 )
+    parallelANN4D.optimizer.setNumIterations( 1000 ).setStepSize( 1.0 )    
+    
+    var model2D = parallelANN2D.train( testRDD2D )
+    var model3D = parallelANN3D.train( testRDD3D )    
+    var model4D = parallelANN4D.train( testRDD4D )
+    
+    val noIt = 100
+    var errHist = new Array[(Int,Double,Double,Double)]( noIt )
+    
+    val validationRDD2D = sc.parallelize( generateInput2D( T => f(T), -10, 10, 100 ), 2).cache()
+    val validationRDD3D = sc.parallelize( generateInput3D( (x,y) => f3D(x,y), -10, 10, -10, 10, 100 ), 2).cache
+    val validationRDD4D = sc.parallelize( generateInput4D( t => f4D(t), -10, 10, 100 ), 2 ).cache
+
+    for( i <- 0 to noIt-1 ) {
+      
+      val predictedAndTarget2D = validationRDD2D.map( T => ( T._1, T._2, model2D.predictV( T._1 ) ) )
+	  val predictedAndTarget3D = validationRDD3D.map( T => ( T._1, T._2, model3D.predictV( T._1 ) ) )	  
+	  val predictedAndTarget4D = validationRDD4D.map( T => ( T._1, T._2, model4D.predictV( T._1 ) ) )
+	  	  
+	  var err2D = predictedAndTarget2D.map( T => 
+	    (T._3.toArray(0) - T._2.toArray(0))*(T._3.toArray(0) - T._2.toArray(0)) 
+	  ).reduce( (u,v) => u+v )
+	  
+	  var err3D = predictedAndTarget3D.map( T => 
+	    (T._3.toArray(0) - T._2.toArray(0))*(T._3.toArray(0) - T._2.toArray(0)) 
+	  ).reduce( (u,v) => u+v )
+
+	  var err4D = predictedAndTarget4D.map( T => {
+	    
+	    val v1 = T._2.toArray
+	    val v2 = T._3.toArray
+	      
+	    (v1(0) - v2(0))*(v1(0) - v2(0))+
+	    (v1(1) - v2(1))*(v1(1) - v2(1))+
+	    (v1(2) - v2(2))*(v1(2) - v2(2))
+	    
+	  } ).reduce( (u,v) => u+v )
+	        
+	  
+	  if( graphic ) {
+
+    	val predicted2D = predictedAndTarget2D.map(
+	      T => concat( T._1, T._3 ) 
+	    )
+	    
+	    val predicted3D = predictedAndTarget3D.map(
+	      T => concat( T._1, T._3 )
+	    )	  
+	    
+	    val predicted4D = predictedAndTarget4D.map(
+	      T => T._3 
+	    )
+  
+     	curAngle = curAngle + math.Pi/4
+	    if( curAngle>=2*math.Pi ) {
+	      curAngle = curAngle-2*math.Pi	    
+	    }
+
+	    outputFrame3D.setAngle( curAngle )
+  	    outputFrame4D.setAngle( curAngle )
+
+	    outputFrame2D.setApproxPoints( predicted2D )
+	    outputFrame3D.setApproxPoints( predicted3D )
+  	    outputFrame4D.setApproxPoints( predicted4D )
+  	    
+	  }
+	    	  
+	  println( "Error 2D/3D/4D: "+(err2D, err3D, err4D) )
+	  errHist(i) = ( i, err2D, err3D, err4D )
+	    
+	  if( i<noIt-1 ) {
+	    model2D = parallelANN2D.train( testRDD2D, model2D )
+	    model3D = parallelANN3D.train( testRDD3D, model3D )	    
+	    model4D = parallelANN4D.train( testRDD4D, model4D )
+	  }
+
+    }
+
+    sc.stop   
+    
+    for( i<-0 to noIt-1 ) {
+      println( errHist(i) ) 
+    }
+    
+  }
+  
+}

From c5cb54d470055a47eecdf4cd2828a1296a9ad9c0 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Thu, 3 Jul 2014 11:42:29 +0800
Subject: [PATCH 072/143] Create TestParallelANNgraphics.scala

Visualisation tools; only used when "TestParallelANN" is given the "graph" parameter.
---
 .../mllib/ann/TestParallelANNgraphics.scala   | 310 ++++++++++++++++++
 1 file changed, 310 insertions(+)
 create mode 100644 mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala
new file mode 100644
index 0000000000000..1d3ac8e4b2486
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala
@@ -0,0 +1,310 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.mllib.ann
+
+import java.awt._
+import java.awt.event._
+import org.apache.spark.rdd.RDD
+import org.apache.spark.mllib.linalg.Vector
+import scala.Array.canBuildFrom
+
+object windowAdapter extends WindowAdapter {
+  
+  override def windowClosing( e: WindowEvent ) {
+    System.exit(0)
+  }
+  
+}
+
+class OutputCanvas2D( wd: Int, ht: Int ) extends Canvas {
+  
+  var points: Array[Vector] = null
+  var approxPoints: Array[Vector] = null
+    
+  /* input: rdd of (x,y) vectors */
+  def setData( rdd: RDD[Vector] ) {
+    points = rdd.toArray  
+    repaint
+  }
+    
+  def setApproxPoints( rdd: RDD[Vector] ) {      
+    approxPoints = rdd.toArray
+    repaint
+  }
+    
+  def plotDot( g: Graphics, x: Int, y: Int ) {
+    val r = 5  
+    val noSamp = 6*r
+    var x1 = x
+    var y1 = y+r
+    for( j<-1 to noSamp ) {
+      val x2 = (x.toDouble+math.sin( j.toDouble*2*math.Pi/noSamp )*r+.5).toInt
+      val y2 = (y.toDouble+math.cos( j.toDouble*2*math.Pi/noSamp )*r+.5).toInt
+      g.drawLine( x1, ht-y1, x2, ht-y2 )
+      x1 = x2
+      y1 = y2
+    }
+  }
+  
+  override def paint( g: Graphics) = {
+	  
+	  var xmax: Double = 0.0
+	  var xmin: Double = 0.0
+	  var ymax: Double = 0.0
+	  var ymin: Double = 0.0
+	  
+	  if( points!=null ) {
+	  
+	    g.setColor( Color.black )
+	    val x = points.map( T => (T.toArray)(0) )
+	    val y = points.map( T => (T.toArray)(1) )
+		  
+	    xmax = x.max
+	    xmin = x.min
+	    ymax = y.max
+	    ymin = y.min
+		  
+	    for( i <- 0 to x.size-1 ) {
+	    
+	      val xr = (((x(i).toDouble-xmin)/(xmax-xmin))*wd+.5).toInt
+	      val yr = (((y(i).toDouble-ymin)/(ymax-ymin))*ht+.5).toInt	    
+	      plotDot( g, xr, yr )
+	    
+	    }
+	  
+      if( approxPoints != null ) {
+        
+	      g.setColor( Color.red )
+		    val x = approxPoints.map( T => (T.toArray)(0) )
+		    val y = approxPoints.map( T => (T.toArray)(1) )
+		
+		    for( i <- 0 to x.size-1 ) {
+		      val xr = (((x(i).toDouble-xmin)/(xmax-xmin))*wd+.5).toInt
+		      val yr = (((y(i).toDouble-ymin)/(ymax-ymin))*ht+.5).toInt
+		      plotDot( g, xr, yr )
+		    }
+		    
+	    }
+
+	  }
+	  
+  }
+}
+
+class OutputFrame2D( title: String ) extends Frame( title ) {
+  
+  val wd = 800
+  val ht = 600
+  
+  var outputCanvas = new OutputCanvas2D( wd, ht )
+  
+  def apply() {
+    addWindowListener( windowAdapter )
+    setSize( wd, ht )
+    add( "Center", outputCanvas )
+    show()
+  }
+  
+  def setData( rdd: RDD[Vector] ) {
+    outputCanvas.setData( rdd )
+  }
+
+  def setApproxPoints( rdd: RDD[Vector] ) {
+    outputCanvas.setApproxPoints( rdd )
+  }
+
+  
+}
+
+object windowAdapter3D extends WindowAdapter {
+  
+  override def windowClosing( e: WindowEvent ) {
+    System.exit(0)
+  }
+  
+}
+
+class OutputCanvas3D( wd: Int, ht: Int, shadowFrac: Double ) extends Canvas {
+ 
+  var angle: Double = 0
+  var points: Array[Vector] = null
+  var approxPoints: Array[Vector] = null
+
+  /* 3 dimensional (x,y,z) vector */
+  def setData( rdd: RDD[Vector] ) {
+    points = rdd.toArray  
+    repaint
+  }
+    
+  def setApproxPoints( rdd: RDD[Vector] ) {      
+    approxPoints = rdd.toArray
+    repaint
+  }
+    
+  def plotDot( g: Graphics, x: Int, y: Int ) {
+    val r = 5  
+    val noSamp = 6*r
+    var x1 = x
+    var y1 = y+r
+    for( j<-1 to noSamp ) {
+      val x2 = (x.toDouble+math.sin( j.toDouble*2*math.Pi/noSamp )*r+.5).toInt
+      val y2 = (y.toDouble+math.cos( j.toDouble*2*math.Pi/noSamp )*r+.5).toInt
+      g.drawLine( x1, ht-y1, x2, ht-y2 )
+      x1 = x2
+      y1 = y2
+    }
+  }
+    
+  def plotLine( g: Graphics, x1: Int, y1: Int, x2: Int, y2: Int ) {
+    g.drawLine( x1, ht-y1, x2, ht-y2 )
+  }
+        
+  def calcCord( arr: Array[Double], angle: Double ): (Double, Double, Double, Double, Double, Double) = {
+
+    var arrOut = new Array[Double](6)
+    
+    val x = arr(0)*math.cos( angle ) - arr(1)*math.sin( angle )
+    val y = arr(0)*math.sin( angle ) + arr(1)*math.cos( angle )
+    val z = arr(2)
+      
+    val x0 = arr(0)*math.cos( angle ) - arr(1)*math.sin( angle )
+    val y0 = arr(0)*math.sin( angle ) + arr(1)*math.cos( angle )
+    val z0 = 0
+      
+    val xs = (arr(0)+shadowFrac*arr(2))*math.cos( angle ) - arr(1)*math.sin( angle )
+    val ys = (arr(0)+shadowFrac*arr(2))*math.sin( angle ) + arr(1)*math.cos( angle )
+    val zs = 0
+      
+    arrOut(0) = y-.5*x
+    arrOut(1) = z-.25*x
+      
+    arrOut(2) = y0-.5*x0
+    arrOut(3) = z0-.25*x0
+      
+    arrOut(4) = ys-.5*xs
+    arrOut(5) = zs-.25*xs
+
+    ( arrOut(0), arrOut(1), arrOut(2), arrOut(3), arrOut(4), arrOut(5) )
+      
+  }
+  
+  override def paint( g: Graphics) = {	
+	  
+	if( points!=null ) {
+		  
+	  var p = points.map( T => calcCord( T.toArray, angle ) ).toArray
+		  
+	  var xmax = p(0)._1
+	  var xmin = p(0)._1
+	  var ymax = p(0)._2
+	  var ymin = p(0)._2
+		  
+	  for( i <-0 to p.size-1 ) {
+
+		  if( xmax<p(i)._1 ) xmax = p(i)._1
+	  	if( xmax<p(i)._3 ) xmax = p(i)._3
+		  if( xmax<p(i)._5 ) xmax = p(i)._5		    
+
+		  if( xmin>p(i)._1 ) xmin = p(i)._1
+		  if( xmin>p(i)._3 ) xmin = p(i)._3
+		  if( xmin>p(i)._5 ) xmin = p(i)._5		    
+
+		  if( ymax<p(i)._2 ) ymax = p(i)._2
+		  if( ymax<p(i)._4 ) ymax = p(i)._4
+		  if( ymax<p(i)._6 ) ymax = p(i)._6		    
+
+		  if( ymin>p(i)._2 ) ymin = p(i)._2
+		  if( ymin>p(i)._4 ) ymin = p(i)._4
+		  if( ymin>p(i)._6 ) ymin = p(i)._6		    
+		      
+	  }
+		  
+	  for( i <- 0 to p.size-1 ) {
+
+		  var x_ = (((p(i)._1-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
+	    var y_ = (((p(i)._2-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt
+	    var x0 = (((p(i)._3-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
+  	  var y0 = (((p(i)._4-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt  	        
+  	  var xs = (((p(i)._5-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
+  	  var ys = (((p(i)._6-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt
+  	        
+		  g.setColor( Color.black )
+		  
+		  plotDot( g, x_, y_ )
+	    plotLine( g, x_, y_, x0, y0 )
+	    g.setColor( Color.gray )
+	    plotLine( g, x0, y0, xs, ys )
+		    
+	  }	  
+		  		  
+	  if( approxPoints != null ) {
+		    
+  		var p = approxPoints.map( T => calcCord( T.toArray, angle ) )
+			  			    
+		for( i <- 0 to p.size-1 ) {
+	
+		  var x_ = (((p(i)._1-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
+		  var y_ = (((p(i)._2-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt
+		  var x0 = (((p(i)._3-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
+	  	  var y0 = (((p(i)._4-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt  	        
+	  	  var xs = (((p(i)._5-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
+	  	  var ys = (((p(i)._6-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt
+	  	        
+		  g.setColor( Color.red )
+		  plotDot( g, x_, y_ )
+		  plotLine( g, x_, y_, x0, y0 )
+		  g.setColor( Color.magenta )
+		  plotLine( g, x0, y0, xs, ys )
+			    
+	    }	  
+		    
+	  }		  		  
+
+	}
+  }
+}
+
+class OutputFrame3D( title: String, shadowFrac: Double ) extends Frame( title ) {
+  
+  val wd = 800
+  val ht = 600
+  
+  def this( title: String ) = this( title, .25 )
+  
+  var outputCanvas = new OutputCanvas3D( wd, ht, shadowFrac )
+  
+  def apply() {
+    addWindowListener( windowAdapter3D )
+    setSize( wd, ht )
+    add( "Center", outputCanvas )
+    show()
+  }
+  
+  def setData( rdd: RDD[Vector] ) {
+    outputCanvas.setData( rdd )
+  }
+  
+  def setAngle( angle: Double ) {
+    outputCanvas.angle = angle
+  }
+
+  def setApproxPoints( rdd: RDD[Vector] ) {
+    outputCanvas.setApproxPoints( rdd )
+  }
+    
+}

From 1af7f2522da4d8330dccfd80e8a530723f9020a3 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Thu, 3 Jul 2014 17:15:29 +0800
Subject: [PATCH 073/143] Update TestParallelANN.scala

Due to TAB characters, some of the indent was messed up. This fixes it.
---
 .../spark/mllib/ann/TestParallelANN.scala     | 138 +++++++++---------
 1 file changed, 69 insertions(+), 69 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
index 813f138cab096..3b56da115701e 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
@@ -30,57 +30,57 @@ object TestParallelANN {
   
   def generateInput2D( f: Double => Double, xmin: Double, xmax: Double, noPoints: Int ): Array[(Vector,Vector)] = {
     
-	var out = new Array[(Vector,Vector)](noPoints)
+    var out = new Array[(Vector,Vector)](noPoints)
 	
-	for( i <- 0 to noPoints-1 ) {
-	  val x = xmin+rand.nextDouble()*(xmax-xmin)
-	  val y = f(x)	    
-	  out(i) = ( Vectors.dense( x ), Vectors.dense( y ) )	    
-	}	
+    for( i <- 0 to noPoints-1 ) {
+      val x = xmin+rand.nextDouble()*(xmax-xmin)
+      val y = f(x)	    
+      out(i) = ( Vectors.dense( x ), Vectors.dense( y ) )	    
+    }	
 	  
-	return out
+    return out
   
   }
 
   
   def generateInput3D( f: (Double,Double) => Double, xmin: Double, xmax: Double, ymin: Double, ymax: Double, noPoints: Int ): Array[(Vector,Vector)] = {
 	
-	var out = new Array[(Vector,Vector)](noPoints)
+    var out = new Array[(Vector,Vector)](noPoints)
 	
-	for( i <- 0 to noPoints-1 ) {
-	  val x = xmin+rand.nextDouble()*(xmax-xmin)
-	  val y = ymin+rand.nextDouble()*(ymax-ymin)
-	  val z = f( x, y )
-	  var arr = new Array[Double](2)
-	  arr(0) = x
-	  arr(1) = y
-	  out(i) = ( Vectors.dense( arr ), Vectors.dense( z ) )
-	} 
+    for( i <- 0 to noPoints-1 ) {
+      val x = xmin+rand.nextDouble()*(xmax-xmin)
+      val y = ymin+rand.nextDouble()*(ymax-ymin)
+      val z = f( x, y )
+      var arr = new Array[Double](2)
+      arr(0) = x
+      arr(1) = y
+      out(i) = ( Vectors.dense( arr ), Vectors.dense( z ) )
+    } 
 	  
-	out
+    out
   
   }  
 
   def generateInput4D( f: Double => (Double,Double,Double), tmin: Double, tmax: Double, noPoints: Int ): Array[(Vector,Vector)] = {
 	
-	var out = new Array[(Vector,Vector)](noPoints)
+    var out = new Array[(Vector,Vector)](noPoints)
 	
-	for( i <- 0 to noPoints-1 ) {
+    for( i <- 0 to noPoints-1 ) {
 	  
-	  val t: Double = tmin+rand.nextDouble()*(tmax-tmin)
+      val t: Double = tmin+rand.nextDouble()*(tmax-tmin)
 	  
-	  var arr = new Array[Double](3)
+      var arr = new Array[Double](3)
 	  
-	  var F = f(t)
+      var F = f(t)
 	  
-	  arr(0) = F._1
-	  arr(1) = F._2
-	  arr(2) = F._3
+      arr(0) = F._1
+      arr(1) = F._2
+      arr(2) = F._3
 	  
-	  out(i) = ( Vectors.dense( t ), Vectors.dense( arr ) )
-	} 
+      out(i) = ( Vectors.dense( t ), Vectors.dense( arr ) )
+    } 
 	  
-	out
+    out
   
   }
   
@@ -186,65 +186,65 @@ object TestParallelANN {
     for( i <- 0 to noIt-1 ) {
       
       val predictedAndTarget2D = validationRDD2D.map( T => ( T._1, T._2, model2D.predictV( T._1 ) ) )
-	  val predictedAndTarget3D = validationRDD3D.map( T => ( T._1, T._2, model3D.predictV( T._1 ) ) )	  
-	  val predictedAndTarget4D = validationRDD4D.map( T => ( T._1, T._2, model4D.predictV( T._1 ) ) )
+      val predictedAndTarget3D = validationRDD3D.map( T => ( T._1, T._2, model3D.predictV( T._1 ) ) )	  
+      val predictedAndTarget4D = validationRDD4D.map( T => ( T._1, T._2, model4D.predictV( T._1 ) ) )
 	  	  
-	  var err2D = predictedAndTarget2D.map( T => 
-	    (T._3.toArray(0) - T._2.toArray(0))*(T._3.toArray(0) - T._2.toArray(0)) 
-	  ).reduce( (u,v) => u+v )
+      var err2D = predictedAndTarget2D.map( T => 
+        (T._3.toArray(0) - T._2.toArray(0))*(T._3.toArray(0) - T._2.toArray(0)) 
+      ).reduce( (u,v) => u+v )
 	  
-	  var err3D = predictedAndTarget3D.map( T => 
-	    (T._3.toArray(0) - T._2.toArray(0))*(T._3.toArray(0) - T._2.toArray(0)) 
-	  ).reduce( (u,v) => u+v )
+      var err3D = predictedAndTarget3D.map( T => 
+        (T._3.toArray(0) - T._2.toArray(0))*(T._3.toArray(0) - T._2.toArray(0)) 
+      ).reduce( (u,v) => u+v )
 
-	  var err4D = predictedAndTarget4D.map( T => {
+      var err4D = predictedAndTarget4D.map( T => {
 	    
-	    val v1 = T._2.toArray
-	    val v2 = T._3.toArray
+        val v1 = T._2.toArray
+        val v2 = T._3.toArray
 	      
-	    (v1(0) - v2(0))*(v1(0) - v2(0))+
-	    (v1(1) - v2(1))*(v1(1) - v2(1))+
-	    (v1(2) - v2(2))*(v1(2) - v2(2))
+        (v1(0) - v2(0))*(v1(0) - v2(0))+
+        (v1(1) - v2(1))*(v1(1) - v2(1))+
+        (v1(2) - v2(2))*(v1(2) - v2(2))
 	    
-	  } ).reduce( (u,v) => u+v )
+      } ).reduce( (u,v) => u+v )
 	        
 	  
-	  if( graphic ) {
+      if( graphic ) {
 
     	val predicted2D = predictedAndTarget2D.map(
-	      T => concat( T._1, T._3 ) 
-	    )
+          T => concat( T._1, T._3 ) 
+        )
 	    
-	    val predicted3D = predictedAndTarget3D.map(
-	      T => concat( T._1, T._3 )
-	    )	  
+        val predicted3D = predictedAndTarget3D.map(
+          T => concat( T._1, T._3 )
+        )	  
 	    
-	    val predicted4D = predictedAndTarget4D.map(
-	      T => T._3 
-	    )
+        val predicted4D = predictedAndTarget4D.map(
+          T => T._3 
+        )
   
      	curAngle = curAngle + math.Pi/4
-	    if( curAngle>=2*math.Pi ) {
-	      curAngle = curAngle-2*math.Pi	    
-	    }
+        if( curAngle>=2*math.Pi ) {
+          curAngle = curAngle-2*math.Pi	    
+        }
 
-	    outputFrame3D.setAngle( curAngle )
-  	    outputFrame4D.setAngle( curAngle )
+        outputFrame3D.setAngle( curAngle )
+        outputFrame4D.setAngle( curAngle )
 
-	    outputFrame2D.setApproxPoints( predicted2D )
-	    outputFrame3D.setApproxPoints( predicted3D )
-  	    outputFrame4D.setApproxPoints( predicted4D )
+        outputFrame2D.setApproxPoints( predicted2D )
+        outputFrame3D.setApproxPoints( predicted3D )
+        outputFrame4D.setApproxPoints( predicted4D )
   	    
-	  }
+      }
 	    	  
-	  println( "Error 2D/3D/4D: "+(err2D, err3D, err4D) )
-	  errHist(i) = ( i, err2D, err3D, err4D )
+      println( "Error 2D/3D/4D: "+(err2D, err3D, err4D) )
+      errHist(i) = ( i, err2D, err3D, err4D )
 	    
-	  if( i<noIt-1 ) {
-	    model2D = parallelANN2D.train( testRDD2D, model2D )
-	    model3D = parallelANN3D.train( testRDD3D, model3D )	    
-	    model4D = parallelANN4D.train( testRDD4D, model4D )
-	  }
+      if( i<noIt-1 ) {
+        model2D = parallelANN2D.train( testRDD2D, model2D )
+        model3D = parallelANN3D.train( testRDD3D, model3D )	    
+        model4D = parallelANN4D.train( testRDD4D, model4D )
+      }
 
     }
 

From 99f0581952a823f920930a770e55b46a143dcd2c Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Thu, 3 Jul 2014 17:16:59 +0800
Subject: [PATCH 074/143] Update TestParallelANN.scala

Fixed some issues with indent.
---
 .../org/apache/spark/mllib/ann/TestParallelANN.scala   | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
index 3b56da115701e..f821f87ebd937 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
@@ -136,13 +136,13 @@ object TestParallelANN {
         
     if( graphic ) {
       
-	  outputFrame2D = new OutputFrame2D( "x -> y" )
-	  outputFrame2D.apply
+      outputFrame2D = new OutputFrame2D( "x -> y" )
+      outputFrame2D.apply
 	    
-	  outputFrame3D = new OutputFrame3D( "(x,y) -> z", 1 )
-	  outputFrame3D.apply    
+      outputFrame3D = new OutputFrame3D( "(x,y) -> z", 1 )
+      outputFrame3D.apply    
 	        
-	  outputFrame4D = new OutputFrame3D( "t -> (x,y,z)" )
+      outputFrame4D = new OutputFrame3D( "t -> (x,y,z)" )
       outputFrame4D.apply
       
     }

From b01fc3ca392f8c98895c4035d9689ae03072d467 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Thu, 3 Jul 2014 17:21:40 +0800
Subject: [PATCH 075/143] Update TestParallelANNgraphics.scala

Due to TAB characters, the indent was mixed up. This fixes it.
---
 .../mllib/ann/TestParallelANNgraphics.scala   | 156 +++++++++---------
 1 file changed, 78 insertions(+), 78 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala
index 1d3ac8e4b2486..4a7a9a712b549 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala
@@ -63,45 +63,45 @@ class OutputCanvas2D( wd: Int, ht: Int ) extends Canvas {
   
   override def paint( g: Graphics) = {
 	  
-	  var xmax: Double = 0.0
-	  var xmin: Double = 0.0
-	  var ymax: Double = 0.0
-	  var ymin: Double = 0.0
+    var xmax: Double = 0.0
+    var xmin: Double = 0.0
+    var ymax: Double = 0.0
+    var ymin: Double = 0.0
 	  
-	  if( points!=null ) {
+    if( points!=null ) {
 	  
-	    g.setColor( Color.black )
-	    val x = points.map( T => (T.toArray)(0) )
-	    val y = points.map( T => (T.toArray)(1) )
+      g.setColor( Color.black )
+      val x = points.map( T => (T.toArray)(0) )
+      val y = points.map( T => (T.toArray)(1) )
 		  
-	    xmax = x.max
-	    xmin = x.min
-	    ymax = y.max
-	    ymin = y.min
+      xmax = x.max
+      xmin = x.min
+      ymax = y.max
+      ymin = y.min
 		  
-	    for( i <- 0 to x.size-1 ) {
+      for( i <- 0 to x.size-1 ) {
 	    
-	      val xr = (((x(i).toDouble-xmin)/(xmax-xmin))*wd+.5).toInt
-	      val yr = (((y(i).toDouble-ymin)/(ymax-ymin))*ht+.5).toInt	    
-	      plotDot( g, xr, yr )
+        val xr = (((x(i).toDouble-xmin)/(xmax-xmin))*wd+.5).toInt
+        val yr = (((y(i).toDouble-ymin)/(ymax-ymin))*ht+.5).toInt	    
+        plotDot( g, xr, yr )
 	    
-	    }
+      }
 	  
       if( approxPoints != null ) {
         
-	      g.setColor( Color.red )
-		    val x = approxPoints.map( T => (T.toArray)(0) )
-		    val y = approxPoints.map( T => (T.toArray)(1) )
+        g.setColor( Color.red )
+        val x = approxPoints.map( T => (T.toArray)(0) )
+        val y = approxPoints.map( T => (T.toArray)(1) )
 		
-		    for( i <- 0 to x.size-1 ) {
-		      val xr = (((x(i).toDouble-xmin)/(xmax-xmin))*wd+.5).toInt
-		      val yr = (((y(i).toDouble-ymin)/(ymax-ymin))*ht+.5).toInt
-		      plotDot( g, xr, yr )
-		    }
+        for( i <- 0 to x.size-1 ) {
+          val xr = (((x(i).toDouble-xmin)/(xmax-xmin))*wd+.5).toInt
+          val yr = (((y(i).toDouble-ymin)/(ymax-ymin))*ht+.5).toInt
+          plotDot( g, xr, yr )
+        }
 		    
-	    }
+      }
 
-	  }
+    }
 	  
   }
 }
@@ -205,77 +205,77 @@ class OutputCanvas3D( wd: Int, ht: Int, shadowFrac: Double ) extends Canvas {
   
   override def paint( g: Graphics) = {	
 	  
-	if( points!=null ) {
+    if( points!=null ) {
 		  
-	  var p = points.map( T => calcCord( T.toArray, angle ) ).toArray
+      var p = points.map( T => calcCord( T.toArray, angle ) ).toArray
 		  
-	  var xmax = p(0)._1
-	  var xmin = p(0)._1
-	  var ymax = p(0)._2
-	  var ymin = p(0)._2
+      var xmax = p(0)._1
+      var xmin = p(0)._1
+      var ymax = p(0)._2
+      var ymin = p(0)._2
 		  
-	  for( i <-0 to p.size-1 ) {
+      for( i <-0 to p.size-1 ) {
 
-		  if( xmax<p(i)._1 ) xmax = p(i)._1
-	  	if( xmax<p(i)._3 ) xmax = p(i)._3
-		  if( xmax<p(i)._5 ) xmax = p(i)._5		    
+        if( xmax<p(i)._1 ) xmax = p(i)._1
+  	if( xmax<p(i)._3 ) xmax = p(i)._3
+        if( xmax<p(i)._5 ) xmax = p(i)._5		    
 
-		  if( xmin>p(i)._1 ) xmin = p(i)._1
-		  if( xmin>p(i)._3 ) xmin = p(i)._3
-		  if( xmin>p(i)._5 ) xmin = p(i)._5		    
+        if( xmin>p(i)._1 ) xmin = p(i)._1
+        if( xmin>p(i)._3 ) xmin = p(i)._3
+        if( xmin>p(i)._5 ) xmin = p(i)._5		    
 
-		  if( ymax<p(i)._2 ) ymax = p(i)._2
-		  if( ymax<p(i)._4 ) ymax = p(i)._4
-		  if( ymax<p(i)._6 ) ymax = p(i)._6		    
+        if( ymax<p(i)._2 ) ymax = p(i)._2
+        if( ymax<p(i)._4 ) ymax = p(i)._4
+        if( ymax<p(i)._6 ) ymax = p(i)._6		    
 
-		  if( ymin>p(i)._2 ) ymin = p(i)._2
-		  if( ymin>p(i)._4 ) ymin = p(i)._4
-		  if( ymin>p(i)._6 ) ymin = p(i)._6		    
+        if( ymin>p(i)._2 ) ymin = p(i)._2
+        if( ymin>p(i)._4 ) ymin = p(i)._4
+        if( ymin>p(i)._6 ) ymin = p(i)._6		    
 		      
-	  }
+      }
 		  
-	  for( i <- 0 to p.size-1 ) {
+      for( i <- 0 to p.size-1 ) {
 
-		  var x_ = (((p(i)._1-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
-	    var y_ = (((p(i)._2-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt
-	    var x0 = (((p(i)._3-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
-  	  var y0 = (((p(i)._4-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt  	        
-  	  var xs = (((p(i)._5-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
-  	  var ys = (((p(i)._6-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt
+        var x_ = (((p(i)._1-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
+        var y_ = (((p(i)._2-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt
+        var x0 = (((p(i)._3-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
+        var y0 = (((p(i)._4-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt  	        
+        var xs = (((p(i)._5-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
+        var ys = (((p(i)._6-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt
   	        
-		  g.setColor( Color.black )
+        g.setColor( Color.black )
 		  
-		  plotDot( g, x_, y_ )
-	    plotLine( g, x_, y_, x0, y0 )
-	    g.setColor( Color.gray )
-	    plotLine( g, x0, y0, xs, ys )
-		    
-	  }	  
+        plotDot( g, x_, y_ )
+        plotLine( g, x_, y_, x0, y0 )
+        g.setColor( Color.gray )
+        plotLine( g, x0, y0, xs, ys )
+	    
+      }	  
 		  		  
-	  if( approxPoints != null ) {
+      if( approxPoints != null ) {
 		    
-  		var p = approxPoints.map( T => calcCord( T.toArray, angle ) )
+        var p = approxPoints.map( T => calcCord( T.toArray, angle ) )
 			  			    
-		for( i <- 0 to p.size-1 ) {
+        for( i <- 0 to p.size-1 ) {
 	
-		  var x_ = (((p(i)._1-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
-		  var y_ = (((p(i)._2-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt
-		  var x0 = (((p(i)._3-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
-	  	  var y0 = (((p(i)._4-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt  	        
-	  	  var xs = (((p(i)._5-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
-	  	  var ys = (((p(i)._6-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt
+          var x_ = (((p(i)._1-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
+          var y_ = (((p(i)._2-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt
+          var x0 = (((p(i)._3-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
+          var y0 = (((p(i)._4-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt  	        
+          var xs = (((p(i)._5-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
+          var ys = (((p(i)._6-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt
 	  	        
-		  g.setColor( Color.red )
-		  plotDot( g, x_, y_ )
-		  plotLine( g, x_, y_, x0, y0 )
-		  g.setColor( Color.magenta )
-		  plotLine( g, x0, y0, xs, ys )
+          g.setColor( Color.red )
+          plotDot( g, x_, y_ )
+          plotLine( g, x_, y_, x0, y0 )
+          g.setColor( Color.magenta )
+          plotLine( g, x0, y0, xs, ys )
 			    
-	    }	  
+        }	  
 		    
-	  }		  		  
+      }		  		  
 
-	}
+    }
   }
 }
 

From cae6dc2522394cc01db052f10bee5b35355ae079 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Wed, 30 Jul 2014 11:06:52 +0800
Subject: [PATCH 076/143] Update GeneralizedSteepestDescendAlgorithm

Updated to agree with "sbt/sbt scalastyle"
---
 .../ann/GeneralizedSteepestDescendAlgorithm   | 59 ++++++++++---------
 1 file changed, 32 insertions(+), 27 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescendAlgorithm b/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescendAlgorithm
index bc5d9e0e31ec4..7c436d1067a12 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescendAlgorithm
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescendAlgorithm
@@ -45,21 +45,21 @@ abstract class GeneralizedSteepestDescendModel(val weights: Vector )
    *
    * @param dataMatrix Row vector containing the features for this data point
    * @param weightMatrix Column vector containing the weights of the model
-   * 
+   *
    * If the prediction model consists of a multi-dimensional vector, predictPoint
-   * returns only the first element of each vector. To get the whole vector, 
+   * returns only the first element of each vector. To get the whole vector,
    * use predictPointV instead.
    */
   protected def predictPoint( dataMatrix: Vector, weightMatrix: Vector ): Double
-  
+
   /**
    * Predict the result given a data point and the weights learned.
    *
    * @param dataMatrix Row vector containing the features for this data point
    * @param weightMatrix Column vector containing the weights of the model
-   * 
+   *
    * Returns the complete output vector.
-   */  
+   */
   protected def predictPointV( dataMatrix: Vector, weightsMatrix: Vector ): Vector
 
   /**
@@ -67,57 +67,57 @@ abstract class GeneralizedSteepestDescendModel(val weights: Vector )
    *
    * @param testData RDD representing data points to be predicted
    * @return RDD[Double] where each entry contains the corresponding prediction
-   * 
+   *
    * Returns only first element of output vector.
    */
   def predict( testData: RDD[Vector] ): RDD[Double] = {
-    
-    val localWeights = weights    
+
+    val localWeights = weights
     testData.map(v => predictPoint(v, localWeights ) )
-    
+
   }
-  
+
   /**
    * Predict values for the given data set using the model trained.
    *
    * @param testData RDD representing data points to be predicted
    * @return RDD[Vector] where each entry contains the corresponding prediction
-   * 
+   *
    * Returns the complete output vector.
-   */  
+   */
   def predictV( testData: RDD[Vector] ): RDD[Vector] = {
-    
+
     val localWeights = weights
     testData.map( v => predictPointV( v, localWeights ) )
-    
+
   }
-  
+
   /**
    * Predict values for a single data point using the model trained.
    *
    * @param testData array representing a single data point
    * @return Double prediction from the trained model
-   * 
+   *
    * Returns only first element of output vector.
    */
   def predict( testData: Vector ): Double = {
-    
+
     predictPoint( testData, weights )
-    
+
   }
-  
+
   /**
    * Predict values for a single data point using the model trained.
    *
    * @param testData array representing a single data point
    * @return Double prediction from the trained model
-   * 
+   *
    * Returns the complete vector.
    */
   def predictV( testData: Vector ): Vector = {
-    
+
     predictPointV( testData, weights )
-    
+
   }
 
 }
@@ -139,7 +139,7 @@ abstract class GeneralizedSteepestDescendAlgorithm[M <: GeneralizedSteepestDesce
    * Create a model given the weights
    */
   protected def createModel(weights: Vector): M
-  
+
   /** Prepends one to the input vector. */
   private def prependOne(vector: Vector): Vector = {
     val vector1 = vector.toBreeze match {
@@ -154,12 +154,17 @@ abstract class GeneralizedSteepestDescendAlgorithm[M <: GeneralizedSteepestDesce
    * Run the algorithm with the configured parameters on an input RDD
    * of LabeledPoint entries starting from the initial weights provided.
    */
-  def run(input: RDD[(Vector,Vector)], initialWeights: Vector): M = {        
-    
-    val data = input.map( v => ( (0.0).toDouble, Vectors.fromBreeze( DenseVector.vertcat( v._1.toBreeze.toDenseVector, v._2.toBreeze.toDenseVector ) ) ) )
+  def run(input: RDD[(Vector,Vector)], initialWeights: Vector): M = {
+
+    val data = input.map( v => (
+      (0.0).toDouble,
+      Vectors.fromBreeze( DenseVector.vertcat(
+        v._1.toBreeze.toDenseVector, 
+        v._2.toBreeze.toDenseVector ) ) 
+      ) )
     val weights = optimizer.optimize(data, initialWeights)
 
     createModel( weights )
-    
+
   }
 }

From 9eee6f1be309dd6847721ec48f2edc7fba40372e Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Wed, 30 Jul 2014 11:07:50 +0800
Subject: [PATCH 077/143] Update ParallelANN.scala

Updated to agree with "sbt/sbt scalastyle"
---
 .../apache/spark/mllib/ann/ParallelANN.scala  | 303 +++++++++---------
 1 file changed, 151 insertions(+), 152 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala
index f0668d77895d2..f43fbb4448fa7 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala
@@ -29,10 +29,11 @@ import breeze.linalg.{Vector => BV}
 import breeze.linalg.{axpy => brzAxpy}
 import org.apache.spark.annotation.DeveloperApi
 import org.apache.spark.mllib.regression.RegressionModel
+import org.apache.spark.util.random.XORShiftRandom
 
 /*
  * Implements a Artificial Neural Network (ANN)
- * 
+ *
  * format of data:
  * data[ 0..noInput-1 ]: Input
  * data[ noInput..noInput+noOutput-1 ]: Output
@@ -40,89 +41,82 @@ import org.apache.spark.mllib.regression.RegressionModel
  */
 
 trait ANN {
-  
+
   def noInput: Integer
   def noHidden: Integer
   def noOutput: Integer
   def beta: Double
-  
-  def g( x: Double ) = (1/(1+math.exp(-beta*x)))
-  def dg( x: Double ) = beta*g(x)*(1-g(x))
 
+  def g( x: Double ) = (1/(1 + math.exp(-beta*x)))
+  def dg( x: Double ) = beta*g(x)*(1 - g(x))
 
   /* returns the hidden layer including the -1 robonode! */
   def computeHidden( data: Vector, weights: Vector ): Vector = {
-    
+
     val brzData = data.toBreeze
-    val brzInp = DenseVector.vertcat( brzData( 0 to noInput-1 ).toDenseVector, DenseVector[Double](-1.0) )    
+    val brzInp = DenseVector.vertcat( 
+      brzData( 0 to noInput - 1 ).toDenseVector, DenseVector[Double](-1.0) )
     val brzWeights = weights.toBreeze
-    var hidden = DenseVector.zeros[Double]( noHidden+1 )
-    
+    var hidden = DenseVector.zeros[Double]( noHidden + 1 )
+
     for( j <- 0 to noHidden-1 ) {
-      
-      val weightsSubset = brzWeights( j*(noInput+1) to j*(noInput+1)+(noInput+1)-1 ).toVector
+
+      val weightsSubset = brzWeights( 
+        j*(noInput + 1) to j*(noInput + 1) + (noInput + 1) - 1 ).toVector
       hidden( j ) = g( weightsSubset.dot( brzInp ) )
-      
+
     }
-    
+
     hidden( noHidden ) = -1.0
-    
+
     Vectors.fromBreeze( hidden )
-    
+
   }
-  
+
   /* returns the hidden layer including the -1 robonode, as well as the final estimation */
   def computeValues( data: Vector, weights: Vector ): (Vector, Vector) = {
-    
+
     var hidden = computeHidden( data, weights )
     var output = new Array[Double](noOutput)
-        
-    for( k<-0 to noOutput-1 ) {
+
+    for( k <- 0 to noOutput - 1 ) {
       val brzWeights = weights.toBreeze
-      var weightsSubset = brzWeights( noHidden*(noInput+1)+k*(noHidden+1) to noHidden*(noInput+1)+(k+1)*(noHidden+1)-1).toVector
+      var weightsSubset = brzWeights( noHidden*(noInput + 1) + k*(noHidden + 1) to
+            noHidden*(noInput + 1) + (k + 1)*(noHidden + 1) - 1).toVector
       output(k) = g( weightsSubset.dot( hidden.toBreeze ) )
     }
-    
+
     ( hidden, Vectors.dense( output ) )
-    
-  }  
-  
+
+  }
+
 }
 
-class ParallelANNModel private[mllib] 
+class ParallelANNModel private[mllib]
 (
     override val weights: Vector,
     val noInp: Integer,
     val noHid: Integer,
     val noOut: Integer,
     val b: Double )
-  extends GeneralizedSteepestDescendModel(weights) with RegressionModel with Serializable with ANN {  
-  
+  extends GeneralizedSteepestDescendModel(weights) with RegressionModel with Serializable with ANN {
+
   val noInput = noInp
   val noHidden = noHid
   val noOutput = noOut
   val beta = b
-  
+
   override def predictPoint( data: Vector, weights: Vector ): Double = {
     val outp = computeValues( data, weights )._2
     outp.toArray(0)
   }
-  
+
   def predictPointV( data: Vector, weights: Vector): Vector = {
     computeValues( data, weights )._2
-  }   
-  
+  }
+
 }
 
-/**
- * Train a linear regression model with no regularization using Stochastic Gradient Descent.
- * This solves the least squares regression formulation
- *              f(weights) = 1/n ||A weights-y||^2
- * (which is the mean squared error).
- * Here the data matrix has n rows, and the input RDD holds the set of rows of A, each with
- * its corresponding right hand side label y.
- * See also the documentation for the precise formulation.
- */
 class ParallelANNWithSGD private (
     private var stepSize: Double,
     private var numIterations: Int,
@@ -130,9 +124,11 @@ class ParallelANNWithSGD private (
     private var noInput: Int,
     private var noHidden: Int,
     private var noOutput: Int,
-    private val beta: Double )    
+    private val beta: Double )
   extends GeneralizedSteepestDescendAlgorithm[ParallelANNModel] with Serializable {
-  
+
+  private val rand = new XORShiftRandom
+
   private val gradient = new LeastSquaresGradientANN( noInput, noHidden, noOutput, beta )
   private val updater = new ANNUpdater()
   override val optimizer = new GradientDescent(gradient, updater)
@@ -140,72 +136,62 @@ class ParallelANNWithSGD private (
     .setNumIterations(numIterations)
     .setMiniBatchFraction(miniBatchFraction)
 
-  /**
-   * Construct a LinearRegression object with default parameters: {stepSize: 1.0,
-   * numIterations: 100, miniBatchFraction: 1.0}.
-   */
   def this() = {
     this( 1.0, 100, 1.0, 1, 5, 1, 1.0 )
   }
-  
+
   def this( noHidden: Int ) = {
     this( 1.0, 100, 1.0, 1, noHidden, 1, 1.0 )
   }
-  
+
   def this( noInput: Int, noHidden: Int ) = {
     this( 1.0, 100, 1.0, noInput, noHidden, 1, 1.0 )
   }
-  
+
   def this( noInput: Int, noHidden: Int, noOutput: Int ) = {
     this( 1.0, 100, 1.0, noInput, noHidden, noOutput, 1.0 )
   }
 
   override protected def createModel(weights: Vector) = {
-    new ParallelANNModel( weights, noInput, noHidden, noOutput, beta )    
+    new ParallelANNModel( weights, noInput, noHidden, noOutput, beta )
   }
-  
+
   def checkOutput( rdd: RDD[(Vector,Vector)] ) {
     val oVals = rdd.flatMap( T => T._2.toArray )
-    var omax = oVals.max    
+    var omax = oVals.max
     assert( omax <= 1 )
     var omin = oVals.min
-    assert( omin >= 0 )    
+    assert( omin >= 0 )
+  }
+
+  def randomDouble( i: Int ): Double = {
+    rand.nextDouble()
   }
-  
-  def randomDouble( i: Int ): Double = {    
-    (((i+5)*59049+(i+5)*78125)%65536).toDouble/65536
-  }  
 
   def train( rdd: RDD[(Vector,Vector)] ): ParallelANNModel = {
- 
+
     val ft = rdd.first()
-    
+
     assert( noInput == ft._1.size )
     assert( noOutput == ft._2.size )
-    
-    checkOutput( rdd ) 
-    
-    val noWeights = (noInput+1)*noHidden + (noHidden+1)*noOutput
+
+    checkOutput( rdd )
+
+    val noWeights = (noInput + 1)*noHidden + (noHidden + 1)*noOutput
 
     val initialWeightsArr = new Array[Double](noWeights)
-    
-    for( i <- 0 to (noInput+1)*noHidden-1 )
-      initialWeightsArr( i ) = (randomDouble(i)*4.8-2.4)/(noInput+1)
-    for( i <- 0 to (noHidden+1)*noOutput-1 )
-      initialWeightsArr( (noInput+1)*noHidden+i ) = (randomDouble(i)*4.8-2.4)/(noHidden+1)
-      
+
+    for( i <- 0 to (noInput + 1)*noHidden - 1 )
+      initialWeightsArr( i ) = (randomDouble(i)*4.8 - 2.4)/(noInput + 1)
+    for( i <- 0 to (noHidden + 1)*noOutput - 1 )
+      initialWeightsArr( (noInput + 1)*noHidden + i ) = (randomDouble(i)*4.8 - 2.4)/(noHidden + 1)
+
     val initialWeights = Vectors.dense( initialWeightsArr )
-        
-    println( "Parameters:" )
-    println( "  noInput: "+noInput )
-    println( "  noHidden: "+noHidden )
-    println( "  noOutput: "+noOutput )
-    println( "  noWeights: "+noWeights )
-    
+
     run( rdd, initialWeights )
 
   }
-  
+
   def train( rdd: RDD[(Vector,Vector)], model: ParallelANNModel ): ParallelANNModel = {
     run( rdd, model.weights )
   }
@@ -214,114 +200,127 @@ class ParallelANNWithSGD private (
 
 /**
  * data consists of input vector and output vector, and has the following form:
- * 
- * [ ---input--- ---output---]
- * 
- * where input = data( 0 to noInput-1 ) and output = data( noInput to noInput+noOutput-1 ) 
- * 
+ *
+ * [ ---input--- ---output--- ]
+ *
+ * where input = data( 0 to noInput-1 ) and output = data( noInput to noInput+noOutput-1 )
+ *
  * V_ij is the weight from input node i to hidden node j
  * W_jk is the weight from hidden node j to output node k
- *  
+ *
  * The weights have the following mapping:
- * 
+ *
  * V_ij goes to position i + j*(noInput+1)
  * W_jk goes to position (noInput+1)*noHidden + j + k*(noHidden+1)
- * 
+ *
  * Gradient has same mapping, i.e.
  * dE/dVij goes to i + j*(noInput+1)
  * dE/dWjk goes to (noInput+1)*noHidden + j +k*(noHidden+1)
- * 
- * Where E = ((estOutput-output),(estOutput-output)), 
+ *
+ * Where E = ((estOutput-output),(estOutput-output)),
  * the inner product of the difference between estimation and target output with itself.
  */
 
-class LeastSquaresGradientANN( noInp: Integer, noHid: Integer, noOut: Integer, b: Double ) extends Gradient with ANN {
-      
+class LeastSquaresGradientANN(
+    noInp: Integer, 
+    noHid: Integer, 
+    noOut: Integer, 
+    b: Double ) 
+  extends Gradient with ANN {
+
   val noInput = noInp
   val noHidden = noHid
   val noOutput = noOut
-  val beta = b   
-  
+  val beta = b
+
   override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
-    
-    val brzData = data.toBreeze   
-    val brzInp = DenseVector.vertcat( brzData( 0 to noInput-1 ).toDenseVector, DenseVector[Double](-1.0) )
-    
-    val brzOut = brzData( noInput.toInt to noInput+noOutput-1 ).toVector
-    val brzWeights = weights.toBreeze        
-    val gradient = DenseVector.zeros[Double]( (noInput+1)*noHidden+(noHidden+1)*noOutput )
-    
-    
+
+    val brzData = data.toBreeze
+    val brzInp = DenseVector.vertcat( brzData( 0 to noInput - 1 ).toDenseVector,
+      DenseVector[Double](-1.0) )
+
+    val brzOut = brzData( noInput.toInt to noInput + noOutput - 1 ).toVector
+    val brzWeights = weights.toBreeze
+    val gradient = DenseVector.zeros[Double]( (noInput + 1)*noHidden + (noHidden + 1)*noOutput )
+
+
     val (hidden, output) = computeValues( data, weights )
     var brzHidden = hidden.toBreeze /* already includes the robonode */
     val brzEst = output.toBreeze
     val diff = brzEst :- brzOut
-    val E = diff.dot(diff)    
-    
-    /* 
+    val E = diff.dot(diff)
+
+    /*
      * The following three fields are for verification only
     val eps = .000001
     val noInpCheck = 0
     val noOutCheck = 0
     */
-        
+
     var brzWeights_tmp = weights.toBreeze
-    
+
     /* Wjk */
-    for( j <-0 to noHidden ) {
-      
-      for( k <-0 to noOutput-1 ) {
-        
-    	val brzW = brzWeights( noHidden*(noInput+1)+k*(noHidden+1) to noHidden*(noInput+1)+(k+1)*(noHidden+1)-1 ).toVector
+    for( j <- 0 to noHidden ) {
+
+      for( k <- 0 to noOutput - 1 ) {
+
+        val brzW = brzWeights( noHidden*(noInput + 1) + k*(noHidden + 1) to
+          noHidden*(noInput + 1) + (k + 1)*(noHidden + 1) - 1 ).toVector
         var sum_l = brzHidden.dot( brzW )
-        gradient( noHidden*(noInput+1)+k*(noHidden+1)+j ) = 2*(diff(k))*dg(sum_l)*brzHidden(j)
+        gradient( noHidden*(noInput + 1) + k*(noHidden + 1) + j )
+          = 2*(diff(k))*dg(sum_l)*brzHidden(j)
 
-        /* 
-         * The following is for verification only 
+        /*
+         * The following is for verification only
         if( noInput==noInpCheck && noOutput==noOutCheck )
         {
-	      brzWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j ) = brzWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j ) + eps             
-	      val est2 = computeValues( data, Vectors.fromBreeze( brzWeights_tmp ) )._2.toBreeze
-	      val diff2 = est2 - brzOut      
-	      val d = ( diff2.dot(diff2) - E ) / eps
-	      println( "Calc/Est Wjk: "+ ( gradient( noHidden*(noInput+1)+k*(noHidden+1)+j), d ) )
-		  brzWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j ) = brzWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j ) - eps
-        }        
+        brzWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j )
+          = brzWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j ) + eps
+        val est2 = computeValues( data, Vectors.fromBreeze( brzWeights_tmp ) )._2.toBreeze
+        val diff2 = est2 - brzOut
+        val d = ( diff2.dot(diff2) - E ) / eps
+        println( "Calc/Est Wjk: "+ ( gradient( noHidden*(noInput+1)+k*(noHidden+1)+j), d ) )
+        brzWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j )
+          = brzWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j ) - eps
+        }
         */
 
       }
-      
+
     }
-    
-    /* Vij */    
+
+    /* Vij */
     for( i <- 0 to noInput ) {
-          
-      for( j <- 0 to noHidden-1 ) { /* the hidden robonode has no associated Vij */        
-      
-        for( k<- 0 to noOutput-1 ) {
-        
-          val brzW = brzWeights( noHidden*(noInput+1) to noHidden*(noInput+1)+(noHidden+1)-1 ).toVector
+
+      for( j <- 0 to noHidden - 1 ) { /* the hidden robonode has no associated Vij */
+
+        for( k<- 0 to noOutput - 1 ) {
+
+          val brzW = brzWeights( noHidden*(noInput + 1) to
+            noHidden*(noInput + 1) + (noHidden + 1) - 1 ).toVector
           val sum_n1 = brzHidden.dot( brzW )
-          val brzV = brzWeights( j*(noInput+1) to j*(noInput+1)+(noInput+1)-1 ).toVector
+          val brzV = brzWeights( j*(noInput + 1) to j*(noInput + 1) + (noInput + 1) - 1 ).toVector
           val sum_n2 = brzV.dot( brzInp )
-          gradient( i+j*(noInput+1) ) = 
-          gradient( i+j*(noInput+1) ) + 2*(diff(k))*dg( sum_n1 )*brzWeights( noHidden*(noInput+1)+k*(noHidden+1)+j )*dg( sum_n2 )*brzInp( i )
+          gradient( i + j*(noInput + 1) ) =
+          gradient( i + j*(noInput + 1) ) 
+            + 2*(diff(k))*dg( sum_n1 )*brzWeights( noHidden*(noInput + 1)
+            + k*(noHidden + 1) + j )*dg( sum_n2 )*brzInp( i )
         }
-        
-        /* 
-         * The following is for verification only 
+
+        /*
+         * The following is for verification only
         if( noInput==noInpCheck && noOutput==noOutCheck )
         {
           brzWeights_tmp( i+j*(noInput+1) ) = brzWeights_tmp( i+j*(noInput+1) ) + eps
           val est2 = computeValues( data, Vectors.fromBreeze( brzWeights_tmp ) )._2.toBreeze
           val diff2 = est2 - brzOut
           val d = ( diff2.dot( diff2 ) - E ) / eps
-          println( "Calc/Est Vij: "+ ( gradient( i+j*(noInput+1) ), d ) )        
+          println( "Calc/Est Vij: "+ ( gradient( i+j*(noInput+1) ), d ) )
           brzWeights_tmp( i+j*(noInput+1) ) = brzWeights_tmp( i+j*(noInput+1) ) - eps
         }
         */
       }
-    }      
+    }
     (Vectors.fromBreeze(gradient), E)
 
   }
@@ -331,38 +330,38 @@ class LeastSquaresGradientANN( noInp: Integer, noHid: Integer, noOut: Integer, b
       label: Double,
       weights: Vector,
       cumGradient: Vector): Double = {
-    
+
     val (grad, err) = compute( data, label, weights )
-    
+
     cumGradient.toBreeze += grad.toBreeze
 
     return err
-    
+
   }
 }
 
 class ANNUpdater extends Updater {
-  
+
   override def compute(
       weightsOld: Vector,
       gradient: Vector,
       stepSize: Double,
       iter: Int,
       regParam: Double): (Vector, Double) = {
-       
-    val thisIterStepSize = stepSize  
-    
+
+    val thisIterStepSize = stepSize
+
     val brzWeights: BV[Double] = weightsOld.toBreeze.toDenseVector
-        
+
     brzAxpy(-thisIterStepSize, gradient.toBreeze, brzWeights)
-    
+
     (Vectors.fromBreeze(brzWeights), 0)
   }
-  
+
 }
 
 class ParallelANN (
-    
+
     private var stepSize: Double,
     private var numIterations: Int,
     private var miniBatchFraction: Double,
@@ -370,7 +369,7 @@ class ParallelANN (
     private var noHidden: Int,
     private var noOutput: Int,
     private val beta: Double
-    
+
   ) extends GeneralizedSteepestDescendAlgorithm[ParallelANNModel] with Serializable {
 
   private val gradient = new LeastSquaresGradientANN( noInput, noHidden, noOutput, beta )
@@ -383,9 +382,9 @@ class ParallelANN (
   def this() = {
     this( 0.001, 100, 1.0, 1, 5, 1, 1.0 )
   }
-  
+
   override protected def createModel(weights: Vector) = {
     new ParallelANNModel(weights, noInput, noHidden, noOutput, beta)
-  }  
-  
+  }
+
 }

From fec869127dc1b2df0f0d9eb5c126806d092f881e Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Wed, 30 Jul 2014 17:01:38 +0800
Subject: [PATCH 078/143] Update GeneralizedSteepestDescendAlgorithm

---
 .../spark/mllib/ann/GeneralizedSteepestDescendAlgorithm   | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescendAlgorithm b/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescendAlgorithm
index 7c436d1067a12..77b73ec5de78d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescendAlgorithm
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescendAlgorithm
@@ -29,12 +29,10 @@ import breeze.linalg.{SparseVector => BSV}
 
 /**
  * :: DeveloperApi ::
- * GeneralizedLinearModel (GLM) represents a model trained using
- * GeneralizedLinearAlgorithm. GLMs consist of a weight vector and
- * an intercept.
+ * GeneralizedSteepestDescendModel represents a model trained using
+ * GeneralizedSteepestDescendAlgorithm.
  *
  * @param weights Weights computed for every feature.
- * @param intercept Intercept computed for this model.
  */
 @DeveloperApi
 abstract class GeneralizedSteepestDescendModel(val weights: Vector )
@@ -110,7 +108,7 @@ abstract class GeneralizedSteepestDescendModel(val weights: Vector )
    * Predict values for a single data point using the model trained.
    *
    * @param testData array representing a single data point
-   * @return Double prediction from the trained model
+   * @return Vector prediction from the trained model
    *
    * Returns the complete vector.
    */

From 060ae3a4133c30b94798d47408c5cc95c0e66002 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Wed, 30 Jul 2014 17:03:27 +0800
Subject: [PATCH 079/143] Update TestParallelANN.scala

Cleaned up the source layout.
---
 .../spark/mllib/ann/TestParallelANN.scala     | 246 +++++++++---------
 1 file changed, 125 insertions(+), 121 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
index f821f87ebd937..77a52a2d9e2fe 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
@@ -25,207 +25,211 @@ import org.apache.spark.mllib.ann._
 import scala.util.Random
 
 object TestParallelANN {
-  
-  var rand = new Random  
-  
+
+  var rand = new Random
+
   def generateInput2D( f: Double => Double, xmin: Double, xmax: Double, noPoints: Int ): Array[(Vector,Vector)] = {
-    
+
     var out = new Array[(Vector,Vector)](noPoints)
-	
-    for( i <- 0 to noPoints-1 ) {
-      val x = xmin+rand.nextDouble()*(xmax-xmin)
-      val y = f(x)	    
-      out(i) = ( Vectors.dense( x ), Vectors.dense( y ) )	    
-    }	
-	  
+
+    for( i <- 0 to noPoints - 1 ) {
+      val x = xmin + rand.nextDouble()*(xmax - xmin)
+      val y = f(x)
+      out(i) = ( Vectors.dense( x ), Vectors.dense( y ) )
+    }
+
     return out
-  
+
   }
 
-  
+
   def generateInput3D( f: (Double,Double) => Double, xmin: Double, xmax: Double, ymin: Double, ymax: Double, noPoints: Int ): Array[(Vector,Vector)] = {
-	
+
     var out = new Array[(Vector,Vector)](noPoints)
-	
-    for( i <- 0 to noPoints-1 ) {
-      val x = xmin+rand.nextDouble()*(xmax-xmin)
-      val y = ymin+rand.nextDouble()*(ymax-ymin)
+
+    for( i <- 0 to noPoints - 1 ) {
+
+      val x = xmin + rand.nextDouble()*(xmax - xmin)
+      val y = ymin + rand.nextDouble()*(ymax - ymin)
       val z = f( x, y )
+
       var arr = new Array[Double](2)
+
       arr(0) = x
       arr(1) = y
       out(i) = ( Vectors.dense( arr ), Vectors.dense( z ) )
-    } 
-	  
+
+    }
+
     out
-  
-  }  
+
+  }
 
   def generateInput4D( f: Double => (Double,Double,Double), tmin: Double, tmax: Double, noPoints: Int ): Array[(Vector,Vector)] = {
-	
+
     var out = new Array[(Vector,Vector)](noPoints)
-	
-    for( i <- 0 to noPoints-1 ) {
-	  
-      val t: Double = tmin+rand.nextDouble()*(tmax-tmin)
-	  
+
+    for( i <- 0 to noPoints - 1 ) {
+
+      val t: Double = tmin + rand.nextDouble()*(tmax - tmin)
       var arr = new Array[Double](3)
-	  
       var F = f(t)
-	  
+
       arr(0) = F._1
       arr(1) = F._2
       arr(2) = F._3
-	  
+
       out(i) = ( Vectors.dense( t ), Vectors.dense( arr ) )
-    } 
-	  
+    }
+
     out
-  
+
   }
-  
+
   def f( T: Double ): Double = {
-    val y = 0.5+Math.abs(T/5).toInt.toDouble*.15+math.sin(T*math.Pi/10)*.1
+    val y = 0.5 + Math.abs(T/5).toInt.toDouble*.15 + math.sin(T*math.Pi/10)*.1
     assert( y<= 1)
     y
   }
-  
-  def f3D( x: Double, y: Double ): Double = {        
-    .5+.24*Math.sin( x*2*math.Pi/10 ) + .24*Math.cos( y*2*math.Pi/10 )    
+
+  def f3D( x: Double, y: Double ): Double = {
+    .5 + .24*Math.sin( x*2*math.Pi/10 ) + .24*Math.cos( y*2*math.Pi/10 )
   }
-  
-  def f4D( t: Double ): (Double, Double,Double) = {  
-    val x = Math.abs(.8*Math.cos( t*2*math.Pi/20 ) )+.1
-    val y = (11+t)/22
-    val z = .5+.35*Math.sin(t*2*math.Pi/5)*Math.cos( t*2*math.Pi/10 ) + .15*t/11    
+
+  def f4D( t: Double ): (Double, Double,Double) = {
+    val x = Math.abs(.8*Math.cos( t*2*math.Pi/20 ) ) + .1
+    val y = (11 + t)/22
+    val z = .5 + .35*Math.sin(t*2*math.Pi/5)*Math.cos( t*2*math.Pi/10 ) + .15*t/11
     ( x, y, z )
   }
-  
+
   def concat( v1: Vector, v2: Vector ): Vector = {
-    
+
     var a1 = v1.toArray
     var a2 = v2.toArray
     var a3 = new Array[Double]( a1.size + a2.size )
-    
-    for( i <- 0 to a1.size-1 ) {
+
+    for( i <- 0 to a1.size - 1 ) {
       a3(i) = a1(i)
     }
-    
-    for( i<-0 to a2.size-1 ) {
-      a3(i+a1.size) = a2(i)
+
+    for( i <- 0 to a2.size - 1 ) {
+      a3(i + a1.size) = a2(i)
     }
-    
+
     Vectors.dense( a3 )
-    
-  }  
+
+  }
 
   def main( arg: Array[String] ) {
-    
+
     println( "Parallel ANN tester" )
-    
+
     var curAngle: Double = 0.0
     var graphic: Boolean = false
-    
+
     if( (arg.length>0) && (arg(0)=="graph" ) ) {
       graphic = true
     }
-    
+
     var outputFrame2D: OutputFrame2D = null
     var outputFrame3D: OutputFrame3D = null
     var outputFrame4D: OutputFrame3D = null
-        
+
     if( graphic ) {
-      
+
       outputFrame2D = new OutputFrame2D( "x -> y" )
       outputFrame2D.apply
-	    
+
       outputFrame3D = new OutputFrame3D( "(x,y) -> z", 1 )
-      outputFrame3D.apply    
-	        
+      outputFrame3D.apply
+
       outputFrame4D = new OutputFrame3D( "t -> (x,y,z)" )
       outputFrame4D.apply
-      
+
     }
-    
+
     var A = 20.0
     var B = 50.0
-    
+
     var conf = new SparkConf().setAppName("Parallel ANN").setMaster("local[5]")
     var sc = new SparkContext(conf)
-    
-    val testRDD2D = sc.parallelize( generateInput2D( T => f(T), -10, 10, 100 ), 2).cache()    
-    val testRDD3D = sc.parallelize( generateInput3D( (x,y) => f3D(x,y), -10, 10, -10, 10, 100 ), 2).cache    
+
+    val testRDD2D = sc.parallelize( generateInput2D( T => f(T), -10, 10, 100 ), 2).cache()
+    val testRDD3D = sc.parallelize( generateInput3D( (x,y) => f3D(x,y), -10, 10, -10, 10, 100 ), 2).cache
     val testRDD4D = sc.parallelize( generateInput4D( t => f4D(t), -10, 10, 100 ), 2 ).cache
-    
+
     if( graphic ) {
+
       outputFrame2D.setData( testRDD2D.map( T => concat( T._1, T._2 ) ) )
-      outputFrame3D.setData( testRDD3D.map( T => concat( T._1, T._2 ) ) )    
+      outputFrame3D.setData( testRDD3D.map( T => concat( T._1, T._2 ) ) )
       outputFrame4D.setData( testRDD4D.map( T => T._2 ) )
+
     }
 
     val parallelANN2D = new ParallelANNWithSGD( 1, 10 )
-    parallelANN2D.optimizer.setNumIterations(1000).setStepSize( 1.0 )    
-      
+    parallelANN2D.optimizer.setNumIterations(1000).setStepSize( 1.0 )
+
     val parallelANN3D = new ParallelANNWithSGD( 2, 20 )
-    parallelANN3D.optimizer.setNumIterations(1000).setStepSize( 1.0 )        
-        
+    parallelANN3D.optimizer.setNumIterations(1000).setStepSize( 1.0 )
+
     val parallelANN4D = new ParallelANNWithSGD( 1, 20, 3 )
-    parallelANN4D.optimizer.setNumIterations( 1000 ).setStepSize( 1.0 )    
-    
+    parallelANN4D.optimizer.setNumIterations( 1000 ).setStepSize( 1.0 )
+
     var model2D = parallelANN2D.train( testRDD2D )
-    var model3D = parallelANN3D.train( testRDD3D )    
+    var model3D = parallelANN3D.train( testRDD3D )
     var model4D = parallelANN4D.train( testRDD4D )
-    
-    val noIt = 100
+
+    val noIt = 200
     var errHist = new Array[(Int,Double,Double,Double)]( noIt )
-    
+
     val validationRDD2D = sc.parallelize( generateInput2D( T => f(T), -10, 10, 100 ), 2).cache()
     val validationRDD3D = sc.parallelize( generateInput3D( (x,y) => f3D(x,y), -10, 10, -10, 10, 100 ), 2).cache
     val validationRDD4D = sc.parallelize( generateInput4D( t => f4D(t), -10, 10, 100 ), 2 ).cache
 
-    for( i <- 0 to noIt-1 ) {
-      
+    for( i <- 0 to noIt - 1 ) {
+
       val predictedAndTarget2D = validationRDD2D.map( T => ( T._1, T._2, model2D.predictV( T._1 ) ) )
-      val predictedAndTarget3D = validationRDD3D.map( T => ( T._1, T._2, model3D.predictV( T._1 ) ) )	  
+      val predictedAndTarget3D = validationRDD3D.map( T => ( T._1, T._2, model3D.predictV( T._1 ) ) )
       val predictedAndTarget4D = validationRDD4D.map( T => ( T._1, T._2, model4D.predictV( T._1 ) ) )
-	  	  
-      var err2D = predictedAndTarget2D.map( T => 
-        (T._3.toArray(0) - T._2.toArray(0))*(T._3.toArray(0) - T._2.toArray(0)) 
-      ).reduce( (u,v) => u+v )
-	  
-      var err3D = predictedAndTarget3D.map( T => 
-        (T._3.toArray(0) - T._2.toArray(0))*(T._3.toArray(0) - T._2.toArray(0)) 
-      ).reduce( (u,v) => u+v )
+
+      var err2D = predictedAndTarget2D.map( T =>
+        (T._3.toArray(0) - T._2.toArray(0))*(T._3.toArray(0) - T._2.toArray(0))
+      ).reduce( (u,v) => u + v )
+
+      var err3D = predictedAndTarget3D.map( T =>
+        (T._3.toArray(0) - T._2.toArray(0))*(T._3.toArray(0) - T._2.toArray(0))
+      ).reduce( (u,v) => u + v )
 
       var err4D = predictedAndTarget4D.map( T => {
-	    
+
         val v1 = T._2.toArray
         val v2 = T._3.toArray
-	      
-        (v1(0) - v2(0))*(v1(0) - v2(0))+
-        (v1(1) - v2(1))*(v1(1) - v2(1))+
+
+        (v1(0) - v2(0))*(v1(0) - v2(0)) +
+        (v1(1) - v2(1))*(v1(1) - v2(1)) +
         (v1(2) - v2(2))*(v1(2) - v2(2))
-	    
-      } ).reduce( (u,v) => u+v )
-	        
-	  
+
+      } ).reduce( (u,v) => u + v )
+
+
       if( graphic ) {
 
-    	val predicted2D = predictedAndTarget2D.map(
-          T => concat( T._1, T._3 ) 
+        val predicted2D = predictedAndTarget2D.map(
+          T => concat( T._1, T._3 )
         )
-	    
+
         val predicted3D = predictedAndTarget3D.map(
           T => concat( T._1, T._3 )
-        )	  
-	    
+        )
+
         val predicted4D = predictedAndTarget4D.map(
-          T => T._3 
+          T => T._3
         )
-  
-     	curAngle = curAngle + math.Pi/4
+
+        curAngle = curAngle + math.Pi/4
         if( curAngle>=2*math.Pi ) {
-          curAngle = curAngle-2*math.Pi	    
+          curAngle = curAngle - 2*math.Pi
         }
 
         outputFrame3D.setAngle( curAngle )
@@ -234,26 +238,26 @@ object TestParallelANN {
         outputFrame2D.setApproxPoints( predicted2D )
         outputFrame3D.setApproxPoints( predicted3D )
         outputFrame4D.setApproxPoints( predicted4D )
-  	    
+
       }
-	    	  
-      println( "Error 2D/3D/4D: "+(err2D, err3D, err4D) )
+
+      println( "Error 2D/3D/4D: " + (err2D, err3D, err4D) )
       errHist(i) = ( i, err2D, err3D, err4D )
-	    
-      if( i<noIt-1 ) {
+
+      if( i < noIt - 1 ) {
         model2D = parallelANN2D.train( testRDD2D, model2D )
-        model3D = parallelANN3D.train( testRDD3D, model3D )	    
+        model3D = parallelANN3D.train( testRDD3D, model3D )
         model4D = parallelANN4D.train( testRDD4D, model4D )
       }
 
     }
 
-    sc.stop   
-    
-    for( i<-0 to noIt-1 ) {
-      println( errHist(i) ) 
+    sc.stop
+
+    for( i <- 0 to noIt - 1 ) {
+      println( errHist(i) )
     }
-    
+
   }
-  
+
 }

From d1619c82ddb3a2a936811c0064a057d9d4111177 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Wed, 30 Jul 2014 17:04:03 +0800
Subject: [PATCH 080/143] Update TestParallelANNgraphics.scala

---
 .../mllib/ann/TestParallelANNgraphics.scala   | 294 ++++++++++--------
 1 file changed, 159 insertions(+), 135 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala
index 4a7a9a712b549..da9386b1c1fe9 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala
@@ -24,102 +24,103 @@ import org.apache.spark.mllib.linalg.Vector
 import scala.Array.canBuildFrom
 
 object windowAdapter extends WindowAdapter {
-  
+
   override def windowClosing( e: WindowEvent ) {
     System.exit(0)
   }
-  
+
 }
 
 class OutputCanvas2D( wd: Int, ht: Int ) extends Canvas {
-  
+
   var points: Array[Vector] = null
   var approxPoints: Array[Vector] = null
-    
+
   /* input: rdd of (x,y) vectors */
   def setData( rdd: RDD[Vector] ) {
-    points = rdd.toArray  
+    points = rdd.toArray
     repaint
   }
-    
-  def setApproxPoints( rdd: RDD[Vector] ) {      
+
+  def setApproxPoints( rdd: RDD[Vector] ) {
     approxPoints = rdd.toArray
     repaint
   }
-    
+
   def plotDot( g: Graphics, x: Int, y: Int ) {
-    val r = 5  
+    val r = 5
     val noSamp = 6*r
     var x1 = x
-    var y1 = y+r
-    for( j<-1 to noSamp ) {
-      val x2 = (x.toDouble+math.sin( j.toDouble*2*math.Pi/noSamp )*r+.5).toInt
-      val y2 = (y.toDouble+math.cos( j.toDouble*2*math.Pi/noSamp )*r+.5).toInt
-      g.drawLine( x1, ht-y1, x2, ht-y2 )
+    var y1 = y + r
+    for( j <- 1 to noSamp ) {
+      val x2 = (x.toDouble + math.sin( j.toDouble*2*math.Pi/noSamp )*r + .5).toInt
+      val y2 = (y.toDouble + math.cos( j.toDouble*2*math.Pi/noSamp )*r + .5).toInt
+      g.drawLine( x1, ht - y1, x2, ht - y2 )
       x1 = x2
       y1 = y2
     }
   }
-  
+
   override def paint( g: Graphics) = {
-	  
-    var xmax: Double = 0.0
-    var xmin: Double = 0.0
-    var ymax: Double = 0.0
-    var ymin: Double = 0.0
-	  
-    if( points!=null ) {
-	  
-      g.setColor( Color.black )
-      val x = points.map( T => (T.toArray)(0) )
-      val y = points.map( T => (T.toArray)(1) )
-		  
-      xmax = x.max
-      xmin = x.min
-      ymax = y.max
-      ymin = y.min
-		  
-      for( i <- 0 to x.size-1 ) {
-	    
-        val xr = (((x(i).toDouble-xmin)/(xmax-xmin))*wd+.5).toInt
-        val yr = (((y(i).toDouble-ymin)/(ymax-ymin))*ht+.5).toInt	    
-        plotDot( g, xr, yr )
-	    
-      }
-	  
+
+  var xmax: Double = 0.0
+  var xmin: Double = 0.0
+  var ymax: Double = 0.0
+  var ymin: Double = 0.0
+
+  if( points!=null ) {
+
+    g.setColor( Color.black )
+    val x = points.map( T => (T.toArray)(0) )
+    val y = points.map( T => (T.toArray)(1) )
+
+    xmax = x.max
+    xmin = x.min
+    ymax = y.max
+    ymin = y.min
+
+    for( i <- 0 to x.size - 1 ) {
+
+      val xr = (((x(i).toDouble - xmin)/(xmax - xmin))*wd + .5).toInt
+      val yr = (((y(i).toDouble - ymin)/(ymax - ymin))*ht + .5).toInt
+      plotDot( g, xr, yr )
+
+    }
+
       if( approxPoints != null ) {
-        
+
         g.setColor( Color.red )
         val x = approxPoints.map( T => (T.toArray)(0) )
         val y = approxPoints.map( T => (T.toArray)(1) )
-		
+
         for( i <- 0 to x.size-1 ) {
-          val xr = (((x(i).toDouble-xmin)/(xmax-xmin))*wd+.5).toInt
-          val yr = (((y(i).toDouble-ymin)/(ymax-ymin))*ht+.5).toInt
+          val xr = (((x(i).toDouble - xmin)/(xmax - xmin))*wd + .5).toInt
+          val yr = (((y(i).toDouble - ymin)/(ymax - ymin))*ht + .5).toInt
           plotDot( g, xr, yr )
         }
-		    
+
       }
 
     }
-	  
+
   }
+
 }
 
 class OutputFrame2D( title: String ) extends Frame( title ) {
-  
+
   val wd = 800
   val ht = 600
-  
+
   var outputCanvas = new OutputCanvas2D( wd, ht )
-  
+
   def apply() {
     addWindowListener( windowAdapter )
     setSize( wd, ht )
     add( "Center", outputCanvas )
     show()
   }
-  
+
   def setData( rdd: RDD[Vector] ) {
     outputCanvas.setData( rdd )
   }
@@ -128,177 +129,200 @@ class OutputFrame2D( title: String ) extends Frame( title ) {
     outputCanvas.setApproxPoints( rdd )
   }
 
-  
+
 }
 
 object windowAdapter3D extends WindowAdapter {
-  
+
   override def windowClosing( e: WindowEvent ) {
     System.exit(0)
   }
-  
+
 }
 
 class OutputCanvas3D( wd: Int, ht: Int, shadowFrac: Double ) extends Canvas {
- 
+
   var angle: Double = 0
   var points: Array[Vector] = null
   var approxPoints: Array[Vector] = null
 
   /* 3 dimensional (x,y,z) vector */
   def setData( rdd: RDD[Vector] ) {
-    points = rdd.toArray  
+    points = rdd.toArray
     repaint
   }
-    
-  def setApproxPoints( rdd: RDD[Vector] ) {      
+
+  def setApproxPoints( rdd: RDD[Vector] ) {
     approxPoints = rdd.toArray
     repaint
   }
-    
+
   def plotDot( g: Graphics, x: Int, y: Int ) {
-    val r = 5  
+    val r = 5
     val noSamp = 6*r
     var x1 = x
-    var y1 = y+r
-    for( j<-1 to noSamp ) {
-      val x2 = (x.toDouble+math.sin( j.toDouble*2*math.Pi/noSamp )*r+.5).toInt
-      val y2 = (y.toDouble+math.cos( j.toDouble*2*math.Pi/noSamp )*r+.5).toInt
-      g.drawLine( x1, ht-y1, x2, ht-y2 )
+    var y1 = y + r
+    for( j <- 1 to noSamp ) {
+      val x2 = (x.toDouble + math.sin( j.toDouble*2*math.Pi/noSamp )*r + .5).toInt
+      val y2 = (y.toDouble + math.cos( j.toDouble*2*math.Pi/noSamp )*r + .5).toInt
+      g.drawLine( x1, ht - y1, x2, ht - y2 )
       x1 = x2
       y1 = y2
     }
   }
-    
+
   def plotLine( g: Graphics, x1: Int, y1: Int, x2: Int, y2: Int ) {
-    g.drawLine( x1, ht-y1, x2, ht-y2 )
+    g.drawLine( x1, ht - y1, x2, ht - y2 )
   }
-        
+
   def calcCord( arr: Array[Double], angle: Double ): (Double, Double, Double, Double, Double, Double) = {
 
     var arrOut = new Array[Double](6)
-    
+
     val x = arr(0)*math.cos( angle ) - arr(1)*math.sin( angle )
     val y = arr(0)*math.sin( angle ) + arr(1)*math.cos( angle )
     val z = arr(2)
-      
+
     val x0 = arr(0)*math.cos( angle ) - arr(1)*math.sin( angle )
     val y0 = arr(0)*math.sin( angle ) + arr(1)*math.cos( angle )
     val z0 = 0
-      
-    val xs = (arr(0)+shadowFrac*arr(2))*math.cos( angle ) - arr(1)*math.sin( angle )
-    val ys = (arr(0)+shadowFrac*arr(2))*math.sin( angle ) + arr(1)*math.cos( angle )
+
+    val xs = (arr(0) + shadowFrac*arr(2))*math.cos( angle ) - arr(1)*math.sin( angle )
+    val ys = (arr(0) + shadowFrac*arr(2))*math.sin( angle ) + arr(1)*math.cos( angle )
     val zs = 0
-      
-    arrOut(0) = y-.5*x
-    arrOut(1) = z-.25*x
-      
-    arrOut(2) = y0-.5*x0
-    arrOut(3) = z0-.25*x0
-      
-    arrOut(4) = ys-.5*xs
-    arrOut(5) = zs-.25*xs
+
+    arrOut(0) = y - .5*x
+    arrOut(1) = z - .25*x
+
+    arrOut(2) = y0 - .5*x0
+    arrOut(3) = z0 - .25*x0
+
+    arrOut(4) = ys - .5*xs
+    arrOut(5) = zs - .25*xs
 
     ( arrOut(0), arrOut(1), arrOut(2), arrOut(3), arrOut(4), arrOut(5) )
-      
+
   }
-  
-  override def paint( g: Graphics) = {	
-	  
+
+  override def paint( g: Graphics) = {
+
     if( points!=null ) {
-		  
+
       var p = points.map( T => calcCord( T.toArray, angle ) ).toArray
-		  
+
       var xmax = p(0)._1
       var xmin = p(0)._1
       var ymax = p(0)._2
       var ymin = p(0)._2
-		  
-      for( i <-0 to p.size-1 ) {
 
-        if( xmax<p(i)._1 ) xmax = p(i)._1
-  	if( xmax<p(i)._3 ) xmax = p(i)._3
-        if( xmax<p(i)._5 ) xmax = p(i)._5		    
+      for( i <- 0 to p.size-1 ) {
 
-        if( xmin>p(i)._1 ) xmin = p(i)._1
-        if( xmin>p(i)._3 ) xmin = p(i)._3
-        if( xmin>p(i)._5 ) xmin = p(i)._5		    
+        if( xmax<p(i)._1 ) {
+          xmax = p(i)._1
+        }
+        if( xmax<p(i)._3 ) {
+          xmax = p(i)._3
+        }
+        if( xmax<p(i)._5 ) {
+          xmax = p(i)._5
+        }
 
-        if( ymax<p(i)._2 ) ymax = p(i)._2
-        if( ymax<p(i)._4 ) ymax = p(i)._4
-        if( ymax<p(i)._6 ) ymax = p(i)._6		    
+        if( xmin>p(i)._1 ) {
+          xmin = p(i)._1
+        }
+        if( xmin>p(i)._3 ) {
+          xmin = p(i)._3
+        }
+        if( xmin>p(i)._5 ) {
+          xmin = p(i)._5
+        }
+
+        if( ymax<p(i)._2 ) {
+          ymax = p(i)._2
+        }
+        if( ymax<p(i)._4 ) {
+          ymax = p(i)._4
+        }
+        if( ymax<p(i)._6 ) {
+          ymax = p(i)._6
+        }
+
+        if( ymin>p(i)._2 ) {
+          ymin = p(i)._2
+        }
+        if( ymin>p(i)._4 ) {
+          ymin = p(i)._4
+        }
+        if( ymin>p(i)._6 ) {
+          ymin = p(i)._6
+        }
 
-        if( ymin>p(i)._2 ) ymin = p(i)._2
-        if( ymin>p(i)._4 ) ymin = p(i)._4
-        if( ymin>p(i)._6 ) ymin = p(i)._6		    
-		      
       }
-		  
+
       for( i <- 0 to p.size-1 ) {
 
-        var x_ = (((p(i)._1-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
-        var y_ = (((p(i)._2-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt
-        var x0 = (((p(i)._3-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
-        var y0 = (((p(i)._4-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt  	        
-        var xs = (((p(i)._5-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
-        var ys = (((p(i)._6-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt
-  	        
+        var x_ = (((p(i)._1 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
+        var y_ = (((p(i)._2 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
+        var x0 = (((p(i)._3 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
+        var y0 = (((p(i)._4 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
+        var xs = (((p(i)._5 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
+        var ys = (((p(i)._6 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
+
         g.setColor( Color.black )
-		  
         plotDot( g, x_, y_ )
         plotLine( g, x_, y_, x0, y0 )
         g.setColor( Color.gray )
         plotLine( g, x0, y0, xs, ys )
-	    
-      }	  
-		  		  
+
+      }
+
       if( approxPoints != null ) {
-		    
+
         var p = approxPoints.map( T => calcCord( T.toArray, angle ) )
-			  			    
+
         for( i <- 0 to p.size-1 ) {
-	
-          var x_ = (((p(i)._1-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
-          var y_ = (((p(i)._2-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt
-          var x0 = (((p(i)._3-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
-          var y0 = (((p(i)._4-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt  	        
-          var xs = (((p(i)._5-xmin)/(xmax-xmin))*(wd-40)+20.5).toInt
-          var ys = (((p(i)._6-ymin)/(ymax-ymin))*(ht-40)+20.5).toInt
-	  	        
+
+          var x_ = (((p(i)._1 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
+          var y_ = (((p(i)._2 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
+          var x0 = (((p(i)._3 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
+          var y0 = (((p(i)._4 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
+          var xs = (((p(i)._5 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
+          var ys = (((p(i)._6 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
+
           g.setColor( Color.red )
           plotDot( g, x_, y_ )
           plotLine( g, x_, y_, x0, y0 )
           g.setColor( Color.magenta )
           plotLine( g, x0, y0, xs, ys )
-			    
-        }	  
-		    
-      }		  		  
+
+        }
+
+      }
 
     }
   }
 }
 
 class OutputFrame3D( title: String, shadowFrac: Double ) extends Frame( title ) {
-  
+
   val wd = 800
   val ht = 600
-  
+
   def this( title: String ) = this( title, .25 )
-  
+
   var outputCanvas = new OutputCanvas3D( wd, ht, shadowFrac )
-  
+
   def apply() {
     addWindowListener( windowAdapter3D )
     setSize( wd, ht )
     add( "Center", outputCanvas )
     show()
   }
-  
+
   def setData( rdd: RDD[Vector] ) {
     outputCanvas.setData( rdd )
   }
-  
+
   def setAngle( angle: Double ) {
     outputCanvas.angle = angle
   }
@@ -306,5 +330,5 @@ class OutputFrame3D( title: String, shadowFrac: Double ) extends Frame( title )
   def setApproxPoints( rdd: RDD[Vector] ) {
     outputCanvas.setApproxPoints( rdd )
   }
-    
+
 }

From 7c3a5b39d448266c79d966b647d97b8f951e3585 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Fri, 1 Aug 2014 13:44:33 +0800
Subject: [PATCH 081/143] Rename GeneralizedSteepestDescendAlgorithm to
 GeneralizedSteepestDescendAlgorithm.scala

---
 ...DescendAlgorithm => GeneralizedSteepestDescendAlgorithm.scala} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename mllib/src/main/scala/org/apache/spark/mllib/ann/{GeneralizedSteepestDescendAlgorithm => GeneralizedSteepestDescendAlgorithm.scala} (100%)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescendAlgorithm b/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescendAlgorithm.scala
similarity index 100%
rename from mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescendAlgorithm
rename to mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescendAlgorithm.scala

From fef477658d6ee52496b53901c561fb75e6c0ea61 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Fri, 1 Aug 2014 16:04:35 +0800
Subject: [PATCH 082/143] Update TestParallelANNgraphics.scala

Replaced deprecated "toArray" by "collect".
---
 .../apache/spark/mllib/ann/TestParallelANNgraphics.scala  | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala
index da9386b1c1fe9..682bb7cd03ab8 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala
@@ -38,12 +38,12 @@ class OutputCanvas2D( wd: Int, ht: Int ) extends Canvas {
 
   /* input: rdd of (x,y) vectors */
   def setData( rdd: RDD[Vector] ) {
-    points = rdd.toArray
+    points = rdd.collect
     repaint
   }
 
   def setApproxPoints( rdd: RDD[Vector] ) {
-    approxPoints = rdd.toArray
+    approxPoints = rdd.collect
     repaint
   }
 
@@ -148,12 +148,12 @@ class OutputCanvas3D( wd: Int, ht: Int, shadowFrac: Double ) extends Canvas {
 
   /* 3 dimensional (x,y,z) vector */
   def setData( rdd: RDD[Vector] ) {
-    points = rdd.toArray
+    points = rdd.collect
     repaint
   }
 
   def setApproxPoints( rdd: RDD[Vector] ) {
-    approxPoints = rdd.toArray
+    approxPoints = rdd.collect
     repaint
   }
 

From c086751df6366daaf0ec396bf9fee5f6d6b81d94 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Thu, 21 Aug 2014 14:34:18 +0800
Subject: [PATCH 083/143] Update and rename
 GeneralizedSteepestDescendAlgorithm.scala to
 GeneralizedSteepestDescentAlgorithm.scala

Updated naming
---
 ...m.scala => GeneralizedSteepestDescentAlgorithm.scala} | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)
 rename mllib/src/main/scala/org/apache/spark/mllib/ann/{GeneralizedSteepestDescendAlgorithm.scala => GeneralizedSteepestDescentAlgorithm.scala} (95%)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescendAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescentAlgorithm.scala
similarity index 95%
rename from mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescendAlgorithm.scala
rename to mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescentAlgorithm.scala
index 77b73ec5de78d..3bc499fa10270 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescendAlgorithm.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescentAlgorithm.scala
@@ -35,7 +35,8 @@ import breeze.linalg.{SparseVector => BSV}
  * @param weights Weights computed for every feature.
  */
 @DeveloperApi
-abstract class GeneralizedSteepestDescendModel(val weights: Vector )
+abstract class GeneralizedSteepestDescentModel(val weights: Vector )
+
   extends Serializable {
 
   /**
@@ -127,7 +128,7 @@ abstract class GeneralizedSteepestDescendModel(val weights: Vector )
  * This class should be extended with an Optimizer to create a new GLM.
  */
 @DeveloperApi
-abstract class GeneralizedSteepestDescendAlgorithm[M <: GeneralizedSteepestDescendModel]
+abstract class GeneralizedSteepestDescentAlgorithm[M <: GeneralizedSteepestDescentModel]
   extends Logging with Serializable {
 
   /** The optimizer to solve the problem. */
@@ -157,8 +158,8 @@ abstract class GeneralizedSteepestDescendAlgorithm[M <: GeneralizedSteepestDesce
     val data = input.map( v => (
       (0.0).toDouble,
       Vectors.fromBreeze( DenseVector.vertcat(
-        v._1.toBreeze.toDenseVector, 
-        v._2.toBreeze.toDenseVector ) ) 
+        v._1.toBreeze.toDenseVector,
+        v._2.toBreeze.toDenseVector ) )
       ) )
     val weights = optimizer.optimize(data, initialWeights)
 

From 21d95d05c6f3be4749c6a250d0b2e244526413ee Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Thu, 21 Aug 2014 14:35:31 +0800
Subject: [PATCH 084/143] Update ParallelANN.scala

Removed usage of Breeze vectors and optimised computation in the loops
---
 .../apache/spark/mllib/ann/ParallelANN.scala  | 248 ++++++++++--------
 1 file changed, 138 insertions(+), 110 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala
index f43fbb4448fa7..d1a31f2c598a3 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala
@@ -51,55 +51,57 @@ trait ANN {
   def dg( x: Double ) = beta*g(x)*(1 - g(x))
 
   /* returns the hidden layer including the -1 robonode! */
-  def computeHidden( data: Vector, weights: Vector ): Vector = {
+  def computeHidden( data: Array[Double], weights: Array[Double] ): Array[Double] = {
 
-    val brzData = data.toBreeze
-    val brzInp = DenseVector.vertcat( 
-      brzData( 0 to noInput - 1 ).toDenseVector, DenseVector[Double](-1.0) )
-    val brzWeights = weights.toBreeze
-    var hidden = DenseVector.zeros[Double]( noHidden + 1 )
+    var arrHidden = new Array[Double]( noHidden + 1 )
 
     for( j <- 0 to noHidden-1 ) {
 
-      val weightsSubset = brzWeights( 
-        j*(noInput + 1) to j*(noInput + 1) + (noInput + 1) - 1 ).toVector
-      hidden( j ) = g( weightsSubset.dot( brzInp ) )
+      val start = j*(noInput + 1)
+      var v: Double = 0;
+      for( w <- 0 to noInput-1 )
+        v = v + data(w)*weights( start + w )
+      v = v - 1.0 * weights( start + noInput ) // robonode
+      arrHidden( j ) = g( v )
 
     }
 
-    hidden( noHidden ) = -1.0
+    arrHidden( noHidden ) = -1.0
 
-    Vectors.fromBreeze( hidden )
+    arrHidden
 
   }
 
   /* returns the hidden layer including the -1 robonode, as well as the final estimation */
-  def computeValues( data: Vector, weights: Vector ): (Vector, Vector) = {
+  def computeValues( 
+      data: Array[Double], 
+      weights: Array[Double] ): 
+      (Array[Double], Array[Double]) = {
 
     var hidden = computeHidden( data, weights )
     var output = new Array[Double](noOutput)
 
     for( k <- 0 to noOutput - 1 ) {
-      val brzWeights = weights.toBreeze
-      var weightsSubset = brzWeights( noHidden*(noInput + 1) + k*(noHidden + 1) to
-            noHidden*(noInput + 1) + (k + 1)*(noHidden + 1) - 1).toVector
-      output(k) = g( weightsSubset.dot( hidden.toBreeze ) )
+      var tmp: Double = 0.0;
+      for( i <- 0 to noHidden )
+        tmp = tmp + hidden(i)*weights( noHidden * ( noInput + 1 ) + k * ( noHidden + 1 ) + i )
+      output(k) = g( tmp )
+
     }
 
-    ( hidden, Vectors.dense( output ) )
+    ( hidden, output )
 
   }
 
 }
 
-class ParallelANNModel private[mllib]
-(
+class ParallelANNModel private[mllib] (
     override val weights: Vector,
     val noInp: Integer,
     val noHid: Integer,
     val noOut: Integer,
     val b: Double )
-  extends GeneralizedSteepestDescendModel(weights) with RegressionModel with Serializable with ANN {
+  extends GeneralizedSteepestDescentModel(weights) with RegressionModel with Serializable with ANN {
 
   val noInput = noInp
   val noHidden = noHid
@@ -107,17 +109,17 @@ class ParallelANNModel private[mllib]
   val beta = b
 
   override def predictPoint( data: Vector, weights: Vector ): Double = {
-    val outp = computeValues( data, weights )._2
-    outp.toArray(0)
+    val outp = computeValues( data.toArray, weights.toArray )._2
+    outp(0)
   }
 
   def predictPointV( data: Vector, weights: Vector): Vector = {
-    computeValues( data, weights )._2
+    Vectors.dense( computeValues( data.toArray, weights.toArray )._2 )
   }
 
 }
 
-class ParallelANNWithSGD private (
+class ParallelANN private (
     private var stepSize: Double,
     private var numIterations: Int,
     private var miniBatchFraction: Double,
@@ -125,7 +127,7 @@ class ParallelANNWithSGD private (
     private var noHidden: Int,
     private var noOutput: Int,
     private val beta: Double )
-  extends GeneralizedSteepestDescendAlgorithm[ParallelANNModel] with Serializable {
+  extends GeneralizedSteepestDescentAlgorithm[ParallelANNModel] with Serializable {
 
   private val rand = new XORShiftRandom
 
@@ -196,6 +198,16 @@ class ParallelANNWithSGD private (
     run( rdd, model.weights )
   }
 
+  def train( rdd: RDD[(Vector,Vector)], weights: Vector ): ParallelANNModel = {
+
+    val ft = rdd.first()
+    assert( noInput == ft._1.size )
+    assert( noOutput == ft._2.size )
+    assert( weights.size == (noInput + 1) * noHidden + (noHidden + 1) * noOutput )
+    run( rdd, weights );
+
+  }
+
 }
 
 /**
@@ -222,10 +234,10 @@ class ParallelANNWithSGD private (
  */
 
 class LeastSquaresGradientANN(
-    noInp: Integer, 
-    noHid: Integer, 
-    noOut: Integer, 
-    b: Double ) 
+    noInp: Integer,
+    noHid: Integer,
+    noOut: Integer,
+    b: Double )
   extends Gradient with ANN {
 
   val noInput = noInp
@@ -233,55 +245,70 @@ class LeastSquaresGradientANN(
   val noOutput = noOut
   val beta = b
 
+  /* For verification only
+  private val rand = new XORShiftRandom
+  */
+
   override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
 
-    val brzData = data.toBreeze
-    val brzInp = DenseVector.vertcat( brzData( 0 to noInput - 1 ).toDenseVector,
-      DenseVector[Double](-1.0) )
+    val arrData = data.toArray
+    val arrWeights = weights.toArray
 
-    val brzOut = brzData( noInput.toInt to noInput + noOutput - 1 ).toVector
-    val brzWeights = weights.toBreeze
-    val gradient = DenseVector.zeros[Double]( (noInput + 1)*noHidden + (noHidden + 1)*noOutput )
+    var gradient = new Array[Double]( (noInput + 1) * noHidden + (noHidden + 1) * noOutput )
 
+    val (arrHidden, output) = computeValues( arrData, arrWeights )
+    val arrEst = output
 
-    val (hidden, output) = computeValues( data, weights )
-    var brzHidden = hidden.toBreeze /* already includes the robonode */
-    val brzEst = output.toBreeze
-    val diff = brzEst :- brzOut
-    val E = diff.dot(diff)
+    var diff = new Array[Double]( noOutput )
+    var E: Double = 0.0
+    for( i <-0 to noOutput-1 ) {
+      diff( i ) = arrEst( i ) - arrData( noInput.toInt + i );
+      E = E + diff(i) * diff(i)
+    }
 
     /*
-     * The following three fields are for verification only
+     * The following fields are for verification only
     val eps = .000001
-    val noInpCheck = 0
-    val noOutCheck = 0
+    val testOneVOutOf = 5000;
+    val testOneWOutOf = 2500;
+    var arrWeights_tmp = weights.toArray
+    val warnErr = 5e-7
     */
 
-    var brzWeights_tmp = weights.toBreeze
-
     /* Wjk */
-    for( j <- 0 to noHidden ) {
+    for( k <- 0 to noOutput - 1 ) {
+
+      var start = noHidden*(noInput + 1) + k*(noHidden + 1)
+      var sum_l: Double = 0
+      for( w <- 0 to noHidden )
+         sum_l = sum_l +  arrHidden( w ) * arrWeights( w + start )
+      val dg_sum_l = dg( sum_l )
 
-      for( k <- 0 to noOutput - 1 ) {
 
-        val brzW = brzWeights( noHidden*(noInput + 1) + k*(noHidden + 1) to
-          noHidden*(noInput + 1) + (k + 1)*(noHidden + 1) - 1 ).toVector
-        var sum_l = brzHidden.dot( brzW )
+      for( j <- 0 to noHidden ) {
+
         gradient( noHidden*(noInput + 1) + k*(noHidden + 1) + j )
-          = 2*(diff(k))*dg(sum_l)*brzHidden(j)
+          = 2*(diff(k))*dg_sum_l*arrHidden(j)
 
         /*
          * The following is for verification only
-        if( noInput==noInpCheck && noOutput==noOutCheck )
-        {
-        brzWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j )
-          = brzWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j ) + eps
-        val est2 = computeValues( data, Vectors.fromBreeze( brzWeights_tmp ) )._2.toBreeze
-        val diff2 = est2 - brzOut
-        val d = ( diff2.dot(diff2) - E ) / eps
-        println( "Calc/Est Wjk: "+ ( gradient( noHidden*(noInput+1)+k*(noHidden+1)+j), d ) )
-        brzWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j )
-          = brzWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j ) - eps
+        if( rand.nextInt % (testOneWOutOf>>1) == 0 ) {
+          arrWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j )
+            = arrWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j ) + eps
+          val est2 = computeValues( arrData, arrWeights_tmp )._2
+          var E2: Double = 0.0;
+          for( w <- 0 to noOutput-1 ) {
+            val diff2 = est2(w)-data( noInput+w )
+            E2 = E2 + diff2*diff2
+          }
+          val d = ( E2 - E ) / eps
+          val compErr = math.abs( gradient( noHidden*(noInput+1)+k*(noHidden+1)+j) - d )
+          if( compErr > warnErr ) {
+            println( "!!! Calc/Est Wjk: " + 
+              ( ( gradient( noHidden*(noInput+1)+k*(noHidden+1)+j), d ), compErr ) )
+          }
+          arrWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j )
+            = arrWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j ) - eps
         }
         */
 
@@ -289,39 +316,69 @@ class LeastSquaresGradientANN(
 
     }
 
+    var start = noHidden * (noInput + 1)
+    var sum_n1: Double = 0
+    for( w <- 0 to noHidden )
+       sum_n1 = sum_n1 + arrHidden( w )*arrWeights( w + start )
+    val dg_sum_n1 = dg( sum_n1 )
+
+
     /* Vij */
-    for( i <- 0 to noInput ) {
+    for( j <- 0 to noHidden - 1 ) { /* the hidden robonode has no associated Vij */
+
+      start = j * ( noInput + 1 )
+      var sum_n2: Double = 0
+      for( w <- 0 to noInput-1 ) // non-robonodes
+         sum_n2 = sum_n2 + arrData( w )*arrWeights( w + start)
+      sum_n2 = sum_n2 - arrWeights( noInput + start) // robonode
+      val dg_sum_n2 = dg( sum_n2 )
+
+      for( i <- 0 to noInput ) {
 
-      for( j <- 0 to noHidden - 1 ) { /* the hidden robonode has no associated Vij */
 
         for( k<- 0 to noOutput - 1 ) {
 
-          val brzW = brzWeights( noHidden*(noInput + 1) to
-            noHidden*(noInput + 1) + (noHidden + 1) - 1 ).toVector
-          val sum_n1 = brzHidden.dot( brzW )
-          val brzV = brzWeights( j*(noInput + 1) to j*(noInput + 1) + (noInput + 1) - 1 ).toVector
-          val sum_n2 = brzV.dot( brzInp )
-          gradient( i + j*(noInput + 1) ) =
-          gradient( i + j*(noInput + 1) ) 
-            + 2*(diff(k))*dg( sum_n1 )*brzWeights( noHidden*(noInput + 1)
-            + k*(noHidden + 1) + j )*dg( sum_n2 )*brzInp( i )
+          if( i<noInput ) { // non-robonode
+            gradient( i + j * (noInput + 1) ) =
+            gradient( i + j * (noInput + 1) ) +
+              2 * ( diff(k)  )* dg_sum_n1 *
+              arrWeights( noHidden * (noInput + 1) + k * (noHidden + 1) + j ) *
+              dg_sum_n2*arrData( i )
+
+          }
+          else { // robonode
+            gradient( i + j * (noInput + 1) ) =
+            gradient( i + j * (noInput + 1) ) -
+              2 * ( diff(k) ) * dg_sum_n1 *
+              arrWeights( noHidden * (noInput + 1) + k * (noHidden + 1) + j ) *
+              dg_sum_n2
+          }
+
         }
 
         /*
          * The following is for verification only
-        if( noInput==noInpCheck && noOutput==noOutCheck )
-        {
-          brzWeights_tmp( i+j*(noInput+1) ) = brzWeights_tmp( i+j*(noInput+1) ) + eps
-          val est2 = computeValues( data, Vectors.fromBreeze( brzWeights_tmp ) )._2.toBreeze
-          val diff2 = est2 - brzOut
-          val d = ( diff2.dot( diff2 ) - E ) / eps
-          println( "Calc/Est Vij: "+ ( gradient( i+j*(noInput+1) ), d ) )
-          brzWeights_tmp( i+j*(noInput+1) ) = brzWeights_tmp( i+j*(noInput+1) ) - eps
+        if( rand.nextInt % (testOneVOutOf>>1) == 0 ) {
+          arrWeights_tmp( i+j*(noInput+1) ) = arrWeights_tmp( i+j*(noInput+1) ) + eps
+          val est2 = computeValues( arrData, arrWeights_tmp )._2
+
+          var E2: Double = 0.0;
+          for( w <- 0 to noOutput-1 ) {
+            val diff2 = est2(w)-data( noInput+w )
+            E2 = E2 + diff2*diff2
+          }
+
+          val d = ( E2 - E ) / eps
+          val compErr = math.abs( gradient( i+j*(noInput+1) )-d )
+          if( compErr>warnErr )
+            println( "!!! Calc/Est Vij: "+ ( ( gradient( i+j*(noInput+1) ), d ), compErr ) )
+          arrWeights_tmp( i+j*(noInput+1) ) = arrWeights_tmp( i+j*(noInput+1) ) - eps
         }
         */
       }
     }
-    (Vectors.fromBreeze(gradient), E)
+
+    (Vectors.dense(gradient), E)
 
   }
 
@@ -359,32 +416,3 @@ class ANNUpdater extends Updater {
   }
 
 }
-
-class ParallelANN (
-
-    private var stepSize: Double,
-    private var numIterations: Int,
-    private var miniBatchFraction: Double,
-    private var noInput: Int,
-    private var noHidden: Int,
-    private var noOutput: Int,
-    private val beta: Double
-
-  ) extends GeneralizedSteepestDescendAlgorithm[ParallelANNModel] with Serializable {
-
-  private val gradient = new LeastSquaresGradientANN( noInput, noHidden, noOutput, beta )
-  private val updater = new SimpleUpdater()
-  override val optimizer = new GradientDescent(gradient, updater)
-    .setStepSize(stepSize)
-    .setNumIterations(numIterations)
-    .setMiniBatchFraction(miniBatchFraction)
-
-  def this() = {
-    this( 0.001, 100, 1.0, 1, 5, 1, 1.0 )
-  }
-
-  override protected def createModel(weights: Vector) = {
-    new ParallelANNModel(weights, noInput, noHidden, noOutput, beta)
-  }
-
-}

From d4764a4e973cf6e2db047c8fba29a9c56ac6e330 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Thu, 21 Aug 2014 14:36:45 +0800
Subject: [PATCH 085/143] Update TestParallelANN.scala

Updated with some performance measurements
---
 .../spark/mllib/ann/TestParallelANN.scala     | 31 +++++++++++++------
 1 file changed, 22 insertions(+), 9 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
index 77a52a2d9e2fe..b37c8a493dd2b 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
@@ -23,12 +23,15 @@ import org.apache.spark.mllib.regression._
 import org.apache.spark.mllib.linalg._
 import org.apache.spark.mllib.ann._
 import scala.util.Random
+import java.util.Calendar
+import java.text.SimpleDateFormat
 
 object TestParallelANN {
 
-  var rand = new Random
+  var rand = new Random( 0 )
 
-  def generateInput2D( f: Double => Double, xmin: Double, xmax: Double, noPoints: Int ): Array[(Vector,Vector)] = {
+  def generateInput2D( f: Double => Double, xmin: Double, xmax: Double, noPoints: Int ): Array[(Vector,Vector)] =
+  {
 
     var out = new Array[(Vector,Vector)](noPoints)
 
@@ -125,6 +128,9 @@ object TestParallelANN {
 
     println( "Parallel ANN tester" )
 
+    val formatter = new SimpleDateFormat("hh:mm:ss")
+    val starttime = Calendar.getInstance().getTime()
+
     var curAngle: Double = 0.0
     var graphic: Boolean = false
 
@@ -156,7 +162,7 @@ object TestParallelANN {
     var sc = new SparkContext(conf)
 
     val testRDD2D = sc.parallelize( generateInput2D( T => f(T), -10, 10, 100 ), 2).cache()
-    val testRDD3D = sc.parallelize( generateInput3D( (x,y) => f3D(x,y), -10, 10, -10, 10, 100 ), 2).cache
+    val testRDD3D = sc.parallelize( generateInput3D( (x,y) => f3D(x,y), -10, 10, -10, 10, 200 ), 2).cache
     val testRDD4D = sc.parallelize( generateInput4D( t => f4D(t), -10, 10, 100 ), 2 ).cache
 
     if( graphic ) {
@@ -167,23 +173,23 @@ object TestParallelANN {
 
     }
 
-    val parallelANN2D = new ParallelANNWithSGD( 1, 10 )
+    val parallelANN2D = new ParallelANN( 1, 10 )
     parallelANN2D.optimizer.setNumIterations(1000).setStepSize( 1.0 )
 
-    val parallelANN3D = new ParallelANNWithSGD( 2, 20 )
+    val parallelANN3D = new ParallelANN( 2, 20 )
     parallelANN3D.optimizer.setNumIterations(1000).setStepSize( 1.0 )
 
-    val parallelANN4D = new ParallelANNWithSGD( 1, 20, 3 )
+    val parallelANN4D = new ParallelANN( 1, 20, 3 )
     parallelANN4D.optimizer.setNumIterations( 1000 ).setStepSize( 1.0 )
 
     var model2D = parallelANN2D.train( testRDD2D )
     var model3D = parallelANN3D.train( testRDD3D )
     var model4D = parallelANN4D.train( testRDD4D )
 
-    val noIt = 200
+    val noIt = 20
     var errHist = new Array[(Int,Double,Double,Double)]( noIt )
 
-    val validationRDD2D = sc.parallelize( generateInput2D( T => f(T), -10, 10, 100 ), 2).cache()
+    val validationRDD2D = sc.parallelize( generateInput2D( T => f(T), -10, 10, 100 ), 2).cache
     val validationRDD3D = sc.parallelize( generateInput3D( (x,y) => f3D(x,y), -10, 10, -10, 10, 100 ), 2).cache
     val validationRDD4D = sc.parallelize( generateInput4D( t => f4D(t), -10, 10, 100 ), 2 ).cache
 
@@ -241,7 +247,10 @@ object TestParallelANN {
 
       }
 
-      println( "Error 2D/3D/4D: " + (err2D, err3D, err4D) )
+      val now = Calendar.getInstance().getTime()
+      val times = formatter.format( now );
+
+      println( "It. "+i+" ("+times+"), Error 2D/3D/4D: " + (err2D, err3D, err4D) )
       errHist(i) = ( i, err2D, err3D, err4D )
 
       if( i < noIt - 1 ) {
@@ -254,10 +263,14 @@ object TestParallelANN {
 
     sc.stop
 
+    val stoptime = Calendar.getInstance().getTime()
+
     for( i <- 0 to noIt - 1 ) {
       println( errHist(i) )
     }
 
+    println( formatter.format( starttime )+"-" + formatter.format( stoptime ) + " "+(stoptime.getTime-starttime.getTime+500)/1000+" seconds" )
+
   }
 
 }

From 4623f2512a24cdc604a4972d192fc8e4838a5c4c Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Thu, 21 Aug 2014 14:37:43 +0800
Subject: [PATCH 086/143] Update TestParallelANNgraphics.scala

Cleaned code
---
 .../mllib/ann/TestParallelANNgraphics.scala   | 34 +++++++++----------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala
index 682bb7cd03ab8..e206a8b7072a3 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala
@@ -63,29 +63,29 @@ class OutputCanvas2D( wd: Int, ht: Int ) extends Canvas {
 
   override def paint( g: Graphics) = {
 
-  var xmax: Double = 0.0
-  var xmin: Double = 0.0
-  var ymax: Double = 0.0
-  var ymin: Double = 0.0
+    var xmax: Double = 0.0
+    var xmin: Double = 0.0
+    var ymax: Double = 0.0
+    var ymin: Double = 0.0
 
-  if( points!=null ) {
+    if( points!=null ) {
 
-    g.setColor( Color.black )
-    val x = points.map( T => (T.toArray)(0) )
-    val y = points.map( T => (T.toArray)(1) )
+      g.setColor( Color.black )
+      val x = points.map( T => (T.toArray)(0) )
+      val y = points.map( T => (T.toArray)(1) )
 
-    xmax = x.max
-    xmin = x.min
-    ymax = y.max
-    ymin = y.min
+      xmax = x.max
+      xmin = x.min
+      ymax = y.max
+      ymin = y.min
 
-    for( i <- 0 to x.size - 1 ) {
+      for( i <- 0 to x.size - 1 ) {
 
-      val xr = (((x(i).toDouble - xmin)/(xmax - xmin))*wd + .5).toInt
-      val yr = (((y(i).toDouble - ymin)/(ymax - ymin))*ht + .5).toInt
-      plotDot( g, xr, yr )
+        val xr = (((x(i).toDouble - xmin)/(xmax - xmin))*wd + .5).toInt
+        val yr = (((y(i).toDouble - ymin)/(ymax - ymin))*ht + .5).toInt
+        plotDot( g, xr, yr )
 
-    }
+      }
 
       if( approxPoints != null ) {
 

From 10242b73dfbd0389ef1aac5ec9ec57f0b84deae4 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Fri, 22 Aug 2014 15:03:08 +0800
Subject: [PATCH 087/143] Create mllib-ann.md

Documentation for Artificial Neural Network (ANN)
---
 docs/mllib-ann.md | 284 ++++++++++++++++++++--------------------------
 1 file changed, 120 insertions(+), 164 deletions(-)

diff --git a/docs/mllib-ann.md b/docs/mllib-ann.md
index bd91b1439da09..1b066d2af38ad 100644
--- a/docs/mllib-ann.md
+++ b/docs/mllib-ann.md
@@ -1,223 +1,179 @@
----
 layout: global
-title: Artificial Neural Networks - MLlib
-displayTitle: <a href="mllib-guide.html">MLlib</a> - Artificial Neural Networks
+title: Linear Methods - MLlib
+displayTitle: <a href="mllib-guide.html">MLlib</a> - Linear Methods
 ---
 
-# Introduction
-
-This document describes the MLlib's Artificial Neural Network (ANN) implementation.
-
-The implementation currently consist of the following files:
-
-* 'ArtificialNeuralNetwork.scala': implements the ANN
-* 'ANNSuite': implements automated tests for the ANN and its gradient
-* 'ANNDemo': a demo that approximates three functions and shows a graphical representation of
-the result
-
-# Summary of usage
+* Table of contents
+{:toc}
 
-The "ArtificialNeuralNetwork" object is used as an interface to the neural network. It is
-called as follows:
+### Introduction
 
-```
-val annModel = ArtificialNeuralNetwork.train(rdd, hiddenLayersTopology, maxNumIterations)
-```
+This document describes the MLLIB's Artificial Neural Network (ANN) implementation.
 
-where
-
-* `rdd` is an RDD of type (Vector,Vector), the first element containing the input vector and
-the second the associated output vector.
-* `hiddenLayersTopology` is an array of integers (Array[Int]), which contains the number of
-nodes per hidden layer, starting with the layer that takes inputs from the input layer, and
-finishing with the layer that outputs to the output layer. The bias nodes are not counted.
-* `maxNumIterations` is an upper bound to the number of iterations to be performed.
-* `ANNmodel` contains the trained ANN parameters, and can be used to calculated the ANNs
-approximation to arbitrary input values.
+The implementation currently consist of the following files:
 
-The approximations can be calculated as follows:
+* 'ParallelANN.scala': implements the ANN
+* 'GeneralizedSteepestDescentAlgorithm.scala': provides an abstract class and model as basis for 'ParallelANN'.
 
-val v_out = annModel.predict(v_in)
+In addition, there is a demo/test available:
 
-where v_in is either a Vector or an RDD of Vectors, and v_out respectively a Vector or RDD of
-(Vector,Vector) pairs, corresponding to input and output values.
+* 'TestParallelANN.scala': tests parallel ANNs for various functions
+* 'TestParallelANNgraphics.scala': graphical output for 'TestParallelANN.scala'
 
-Further details and other calling options will be elaborated upon below.
+### Architecture and Notation
 
-# Architecture and Notation
+The file ParallelANN.scala implements a three-layer ANN with the following architecture:
 
-The file ArtificialNeuralNetwork.scala implements the ANN. The following picture shows the
-architecture of a 3-layer ANN:
 
-```
  +-------+
  |       |
- | N_0,0 |
+ |  X_0  |
  |       | 
- +-------+        +-------+
-                  |       |
- +-------+        | N_0,1 |       +-------+
- |       |        |       |       |       |
- | N_1,0 |-       +-------+     ->| N_0,2 |
- |       | \ Wij1              /  |       |
- +-------+  --    +-------+  --   +-------+
-               \  |       | / Wjk2
-     :          ->| N_1,1 |-      +-------+
-     :            |       |       |       |
-     :            +-------+       | N_1,2 |
-     :                            |       |
-     :                :           +-------+
-     :                :
-     :                :                :
-     :                : 
-     :                :           +-------+
-     :                :           |       |
-     :                :           |N_K-1,2|
-     :                            |       |
-     :            +-------+       +-------+
-     :            |       |
-     :            |N_J-1,1|
-                  |       |
- +-------+        +-------+
+ +-------+       +-------+
+                 |       |
+ +-------+       |  H_0  |      +-------+
+ |       |       |       |      |       |
+ |  X_1  |-      +-------+    ->|  O_0  |
+ |       | \ Vij             /  |       |
+ +-------+  -    +-------+  -   +-------+
+              \  |       | / Wjk
+     :         ->|  H_1  |-     +-------+
+     :           |       |      |       |
+     :           +-------+      |  O_1  |
+     :                          |       |
+     :               :          +-------+
+     :               :
+     :               :              :
+     :               : 
+     :               :          +-------+
+     :               :          |       |
+     :               :          | O_K-1 |
+     :                          |       |
+     :           +-------+      +-------+
+     :           |       |
+     :           | H_J-1 |
+                 |       |
+ +-------+       +-------+
  |       | 
- |N_I-1,0|  
+ | X_I-1 |  
  |       |
  +-------+
 
- +-------+        +--------+
- |       |        |        |
- |   -1  |        |   -1   |
- |       |        |        |
- +-------+        +--------+
+ +-------+      +--------+
+ |       |      |        |
+ |   -1  |      |   -1   |
+ |       |      |        |
+ +-------+      +--------+
 
-INPUT LAYER      HIDDEN LAYER    OUTPUT LAYER
-```
+INPUT LAYER     HIDDEN LAYER    OUTPUT LAYER
 
-The i-th node in layer l is denoted by N_{i,l}, both i and l starting with 0. The weight
-between node i in layer l-1 and node j in layer l is denoted by Wijl. Layer 0 is the input
-layer, whereas layer L is the output layer.
 
-The ANN also implements bias units. These are nodes that always output the value -1. The bias
-units are in all layers except the output layer. They act similar to other nodes, but do not
-have input.
+The nodes '$X_0$' to '$X_{I-1}$' are the '$I$' input nodes. The nodes '$H_0$' to '$H_{J-1}$' are the '$J$' hidden nodes and the nodes '$O_0$' to '$O_{K-1}$' are the '$K$' output nodes. Between each input node '$X_i$' and hidden node '$H_j$' there is a weight '$V_{ij}$'. Likewise, between each hidden node '$H_j$' and each output node '$O_k$' is a weight '$W_{jk}$'. 
 
-The "hiddenLayersTopology" array is converted into the "topology" array by adding the number of
-input nodes in front, and the number of output nodes at the end.
+The ANN also implements two bias units. These are nodes that always output the value -1. The bias units are in the input and in the hidden layer. They act as normal nodes, except that the bias unit in the hidden layer has no input. The bias units can also be denoted by '$X_I$' and '$H_J$'.
 
-The value of node N_{j,l} is calculated  as follows:
+The value of a hidden node '$H_j$' is calculated as follows:
 
-`$N_{j,l} = g( \sum_{i=0}^{topology_l} W_{i,j,l)*N_{i,l-1} )$`
+'$H_j = g ( \sum_{i=0}^{I} X_i*V_{i,j} )$'
 
-Where g is the sigmoid function
+Likewise, the value of the output node '$O_k$' is calculated as follows:
 
-`$g(t) = \frac{e^{\beta t} }{1+e^{\beta t}}$`
+'$O_k = g( \sum_{j=0}^{J} H_j*W_{j,k} )$'
 
-# LBFGS
+Where '$g$' is the sigmod function
 
-MLlib's ANN implementation uses the LBFGS optimisation algorithm for training. It minimises the
-following error function:
+'$g(t) = \frac{e^{\beta t} }{1+e^{\beta t}}$'
 
-`$E = \sum_{k=0}^{K-1} (N_{k,L} - Y_k)^2$`
+and '$\beta' the learning rate.
 
-where Y_k is the target output given inputs N_{0,0} ... N_{I-1,0}.
+### Gradient descent
 
-# Implementation Details
+Currently, the MLLIB uses gradent descent for training. This means that the weights '$V_{ij}$' and '$W_{jk}$' are updated by adding a fraction of the gradient to '$V_{ij}$' and '$W_{jk}$' of the following function:
 
-## The "ArtificialNeuralNetwork" class
+'$E = \sum_{k=0}^{K-1} (O_k - Y_k )^2$'
 
-The "ArtificialNeuralNetwork" class has the following constructor:
+where '$Y_k$' is the target output given inputs '$X_0$' ... '$X_{I-1}$'
 
-```
-class ArtificialNeuralNetwork private(topology: Array[Int], maxNumIterations: Int,
-convergenceTol: Double)
-```
+Calculations provide that:
 
-* `topology` is an array of integers indicating then number of nodes per layer. For example, if
-"topology" holds (3, 5, 1), it means that there are three input nodes, five nodes in a single
-hidden layer and 1 output node.
-* `maxNumIterations` indicates the number of iterations after which the LBFGS algorithm must
-have stopped.
-* `convergenceTol` indicates the acceptable error, and if reached the LBFGS algorithm will
-stop. A lower value of "convergenceTol" will give a higher precision.
+'$\frac{\partial E}{\partial W_{jk}} = 2 (O_k-Y_k) \cdot H_j \cdot g' \left( \sum_{m=0}^{J} W_{mk} H_m \right)$'
 
-## The "ArtificialNeuralNetwork" object
+and
 
-The object "ArtificialNeuralNetwork" is the interface to the "ArtificialNeuralNetwork" class.
-The object contains the training function. There are four different instances of the training
-function, each for use with different parameters. All take as the first parameter the RDD
-"input", which contains pairs of input and output vectors.
+'$\frac{\partial E}{\partial V_{ij}} = 2 \sum_{k=0}^{K-1} \left( (O_k - Y_k)  \cdot X_i \cdot W_{jk} \cdot g'\left( \sum_{n=0}^{J} W_{nk} H_n \right) g'\left( \sum_{m=0}^{I} V_{mj} X_i \right) \right)$'
 
-* `def train(input: RDD[(Vector, Vector)], hiddenLayersTopology: Array[Int], maxNumIterations:
-Int): ArtificialNeuralNetworkModel`: starts training with random initial weights, and a default
-convergenceTol=1e-4.
-* `def train(input: RDD[(Vector, Vector)], model: ArtificialNeuralNetworkModel,
-maxNumIterations: Int): ArtificialNeuralNetworkModel`: resumes training given an earlier
-calculated model, and a default convergenceTol=1e-4.
-* `def train(input: RDD[(Vector, Vector)], hiddenLayersTopology: Array[Int], maxNumIterations:
-Int, convergenceTol: Double): ArtificialNeuralNetworkModel`: starts training with random
-initial weights. Allows setting a customised "convergenceTol".
-* `def train(input: RDD[(Vector, Vector)], model: ArtificialNeuralNetworkModel,
-maxNumIterations: Int, convergenceTol: Double): ArtificialNeuralNetworkModel`: resumes training
-given an earlier calculated model. Allows setting a customised "convergenceTol".
+The training step consists of the two operations
 
-Notice that the "hiddenLayersTopology" differs from the "topology" array. The
-"hiddenLayersTopology" does not include the number of nodes in the input and output layers. The
-number of nodes in input and output layers is calculated from the first element of the training
-RDD. For example, the "topology" array (3, 5, 7, 1) would have a "hiddenLayersTopology" (5, 7),
-the values 3 and 1 are deduced from the training data. The rationale for having these different
-arrays is that future methods may have a different mapping between input values and input nodes
-or output values and output nodes.
+'$V_{ij} = V_{ij} - \epsilon \frac{\partial E}{\partial V_{ij}}$'
 
-## The "ArtificialNeuralNetworkModel" class
+and
 
-All training functions return the trained ANN using the class "ArtificialNeuralNetworkModel".
-This class has the following function:
+'$W_{jk} = W_{jk} - \epsilon \frac{\partial E}{\partial W_{jk}}$'
 
-* `predict(testData: Vector): Vector` calculates the output vector given input vector
-"testData".
-* `predict(testData: RDD[Vector]): RDD[(Vector,Vector)]` returns (input, output) vector pairs,
-using input vector pairs in "testData".
+where '$\epsilon$' is the step size.
 
-The weights used by "predict" come from the model.
+### Implementation Details
 
-## Training
+## The 'ParallelANN' class
+
+The 'ParallelANN' class is the main class of the ANN. This class uses a trait 'ANN', which includes functions for calculating the hidden layer ('computeHidden') and calculation of the output ('computeValues'). The output of 'computeHidden' includes the bias node in the hidden layer, such that it does not need to handle the hidden bias node differently.
+
+The 'ParallelANN' class has the following constructors:
+
+'ParallelANN( stepSize, numIterations, miniBatchFraction, noInput, noHidden, noOutput, beta )'
+'ParallelANN()': assumes 'stepSize'=1.0, 'numIterations'=100, 'miniBatchFraction'=1.0, 'noInput'=1, 'noHidden'=5, 'noOutput'=1, 'beta'=1.0.
+'ParallelANN( noHidden )': as 'ParallelANN()', but allows specification of 'noHidden'
+'ParallelANN( noInput, noHidden )': as 'ParallelANN()', but allows specification of number of 'noInput' and 'noHidden'
+'ParallelANN( noInput, noHidden, noOutput )': as 'ParallelANN()', but allows specification of 'noInput', 'noHidden' and 'noOutput'
+
+The number of input nodes '$I$' is stored in the variable 'noInput', the number of hidden nodes '$J$' is stored in 'noHidden' and the number of output nodes '$K$' is stored in 'noOutput'. 'beta' contains the value of '$\beta$' for the sigmoid function.
+
+The parameters 'stepSize', 'numIterations' and 'miniBatchFraction' are of use for the Statistical Gradient Descent function.
+
+In addition, it has a single vector 'weights' corresponding to $V_{ij}$ and $W_{jk}$. The mapping of '$V_{ij}$' and '$W_{jk}$' into 'weights' is as follows:
 
-We have chosen to implement the ANN with LBFGS as optimiser function. We compared it with
-Statistical Gradient Descent. LBGFS was much faster, but in accordance is also earlier with
-overfitting.
+'$V_{ij}$' -> 'weights[  i + j*(noInput+1) ]$'
 
-Science has provided many different strategies to train an ANN. Hence it is important that the
-optimising functions in MLlib's ANN are interchangeable. A new optimisation strategy can be
-implemented by creating a new class descending from ArtificialNeuralNetwork, and replacing the
-optimiser, updater and possibly gradient as required.
+'$W_{jk}$' -> 'weights[ (noInput+1)*noHidden + j + k*(noHidden+1) ]$'
 
-# Demo and tests
+The training function carries the name 'train'. It can take various inputs:
 
-Usage of MLlib's ANN is demonstrated through the "ANNDemo" demo program. The program generates
-three functions:
+'def train( rdd: RDD[(Vector,Vector)] )': starts a complete new training session and generates a new ANN.
+'def train( rdd: RDD[(Vector,Vector)], model: ParallelANNModel )': continues a training session with an existing ANN.
+'def train( rdd: RDD[(Vector,Vector)], weights: Vector )': starts a training session using initial weights as indicated by 'weights'.
+
+The input of the training function is an RDD with (input/output) training pairs, each input and output being stored as a 'Vector'. The training function returns a variable of from class 'ParallelANNModel', as described below.
+
+## The 'ParallelANNModel' class
+
+All information needed for the ANN is stored in the 'ParallelANNModel' class. The training function 'train' from 'ParallelANN' returns an object from the 'ParallelANNModel' class.
+
+The information in 'parallelANNModel' consist of the weights, the number of input, hidden and output nodes, as well as two functions 'predictPoint' and 'predictPointV'.
+
+The 'predictPoint' function is used to calculate a single output value as a 'Double'. If the output of the ANN actually is a vector, it returns just the first element of the vector, that is '$O_{0}$'. The output of the 'predictPointV' is of type 'Vector', and returns all '$K$' output values.
+
+## The 'GeneralizedSteepestDescentAlgorithm' class
+
+The 'GeneralizedSteepestDescendAlgorithm' class is based on the 'GeneralizedLinearAlgorithm' class. The main difference is that the 'GeneralizedSteepestDescentAlgorithm' is based on output values of type 'Vector', whereas 'GeneralizedLinearAlgorithm' is based of output values of type 'Double'. The new class was needed, because an ANN ideally outputs multiple values, hence a 'Vector'.
+
+## Training
+
+Science has provided many different strategies to train an ANN. Hence it is important that the optimising functions in MLLIB's ANN are interchangeable. The ParallelANN class has a variable 'optimizer', which is currently set to a 'GradientDescent' optimising class. The 'GradientDescent' optimising class implements a stochastic gradient descent method, and is also used for other optimisation technologies in Spark. It is expected that other optimising functions will be defined for Spark, and these can be stored in the 'optimizer' variable.
+
+### Demo/test
+
+Usage of MLLIB's ANN is demonstrated through the 'TestParallelANN' demo program. The program generates three functions:
 
 * f2d: x -> y
 * f3d: (x,y) -> z
 * f4d: t -> (x,y,z)
 
-It will calculate approximations of the target functions, and show a graphical representation
-of the training set and the results after applying the testing set.
-
-In addition, there are the following automated tests:
-
-* "ANN learns XOR function": tests that the ANN can properly approximate an XOR function.
-* "Gradient of ANN": tests that the output of the ANN gradient is roughly equal to an
-approximated gradient.
+When the program is given the Java argument 'graph', it will show a graphical representation of the target function and the latest values.
 
-# Conclusion
+### Conclusion
 
-The "ArtificialNeuralNetwork" class implements a Artificial Neural Network (ANN), using the
-LBFGS algorithm. It takes as input an RDD of input/output values of type "(Vector,Vector)", and
-returns an object of type "ArtificialNeuralNetworkModel" containing the parameters of the
-trained ANN. The "ArtificialNeuralNetworkModel" object can also be used to calculate results
-after training.
+The 'ParallelANN' class implements a Artificial Neural Network (ANN), using the stochastic gradient descent method. It takes as input an RDD of input/output values of type 'Vector', and returns an object of type 'ParallelANNModel' containing the parameters of the trained ANN. The 'ParallelANNModel' object can also be used to calculate results after training.
 
-The training of an ANN can be interrupted and later continued, allowing intermediate inspection
-of the results.
+The training of an ANN can be interrupted and later continued, allowing intermediate inspection of the results.
 
-A demo program and tests for ANN are provided.
+A demo program for ANN is provided.

From 402ad79984840c4d188b034c645edbeca0618cd4 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Fri, 22 Aug 2014 15:23:14 +0800
Subject: [PATCH 088/143] Update mllib-ann.md

Edit layout to make it more readible
---
 docs/mllib-ann.md | 65 +++++++++++++++++++++++------------------------
 1 file changed, 32 insertions(+), 33 deletions(-)

diff --git a/docs/mllib-ann.md b/docs/mllib-ann.md
index 1b066d2af38ad..70d35978f8a6c 100644
--- a/docs/mllib-ann.md
+++ b/docs/mllib-ann.md
@@ -24,7 +24,7 @@ In addition, there is a demo/test available:
 
 The file ParallelANN.scala implements a three-layer ANN with the following architecture:
 
-
+```
  +-------+
  |       |
  |  X_0  |
@@ -66,51 +66,51 @@ The file ParallelANN.scala implements a three-layer ANN with the following archi
  +-------+      +--------+
 
 INPUT LAYER     HIDDEN LAYER    OUTPUT LAYER
+```
 
+The nodes X_0 to X_{I-1} are the I input nodes. The nodes H_0 to H_{J-1} are the J hidden nodes and the nodes O_0 to O_{K-1} are the K output nodes. Between each input node X_i and hidden node H_j there is a weight V_{ij}. Likewise, between each hidden node H_j and each output node O_k is a weight W_{jk}. 
 
-The nodes '$X_0$' to '$X_{I-1}$' are the '$I$' input nodes. The nodes '$H_0$' to '$H_{J-1}$' are the '$J$' hidden nodes and the nodes '$O_0$' to '$O_{K-1}$' are the '$K$' output nodes. Between each input node '$X_i$' and hidden node '$H_j$' there is a weight '$V_{ij}$'. Likewise, between each hidden node '$H_j$' and each output node '$O_k$' is a weight '$W_{jk}$'. 
-
-The ANN also implements two bias units. These are nodes that always output the value -1. The bias units are in the input and in the hidden layer. They act as normal nodes, except that the bias unit in the hidden layer has no input. The bias units can also be denoted by '$X_I$' and '$H_J$'.
+The ANN also implements two bias units. These are nodes that always output the value -1. The bias units are in the input and in the hidden layer. They act as normal nodes, except that the bias unit in the hidden layer has no input. The bias units can also be denoted by X_I and H_J.
 
-The value of a hidden node '$H_j$' is calculated as follows:
+The value of a hidden node H_j is calculated as follows:
 
-'$H_j = g ( \sum_{i=0}^{I} X_i*V_{i,j} )$'
+`$H_j = g ( \sum_{i=0}^{I} X_i*V_{i,j} )$`
 
-Likewise, the value of the output node '$O_k$' is calculated as follows:
+Likewise, the value of the output node O_k is calculated as follows:
 
-'$O_k = g( \sum_{j=0}^{J} H_j*W_{j,k} )$'
+`$O_k = g( \sum_{j=0}^{J} H_j*W_{j,k} )$`
 
-Where '$g$' is the sigmod function
+Where g is the sigmod function
 
-'$g(t) = \frac{e^{\beta t} }{1+e^{\beta t}}$'
+`$g(t) = \frac{e^{\beta t} }{1+e^{\beta t}}$`
 
-and '$\beta' the learning rate.
+and `$\beta` the learning rate.
 
 ### Gradient descent
 
-Currently, the MLLIB uses gradent descent for training. This means that the weights '$V_{ij}$' and '$W_{jk}$' are updated by adding a fraction of the gradient to '$V_{ij}$' and '$W_{jk}$' of the following function:
+Currently, the MLLIB uses gradent descent for training. This means that the weights V_{ij} and W_{jk} are updated by adding a fraction of the gradient to V_{ij} and W_{jk} of the following function:
 
-'$E = \sum_{k=0}^{K-1} (O_k - Y_k )^2$'
+`$E = \sum_{k=0}^{K-1} (O_k - Y_k )^2$`
 
-where '$Y_k$' is the target output given inputs '$X_0$' ... '$X_{I-1}$'
+where Y_k is the target output given inputs X_0 ... X_{I-1}
 
 Calculations provide that:
 
-'$\frac{\partial E}{\partial W_{jk}} = 2 (O_k-Y_k) \cdot H_j \cdot g' \left( \sum_{m=0}^{J} W_{mk} H_m \right)$'
+`$\frac{\partial E}{\partial W_{jk}} = 2 (O_k-Y_k) \cdot H_j \cdot g' \left( \sum_{m=0}^{J} W_{mk} H_m \right)$`
 
 and
 
-'$\frac{\partial E}{\partial V_{ij}} = 2 \sum_{k=0}^{K-1} \left( (O_k - Y_k)  \cdot X_i \cdot W_{jk} \cdot g'\left( \sum_{n=0}^{J} W_{nk} H_n \right) g'\left( \sum_{m=0}^{I} V_{mj} X_i \right) \right)$'
+`$\frac{\partial E}{\partial V_{ij}} = 2 \sum_{k=0}^{K-1} \left( (O_k - Y_k)  \cdot X_i \cdot W_{jk} \cdot g'\left( \sum_{n=0}^{J} W_{nk} H_n \right) g'\left( \sum_{m=0}^{I} V_{mj} X_i \right) \right)$`
 
 The training step consists of the two operations
 
-'$V_{ij} = V_{ij} - \epsilon \frac{\partial E}{\partial V_{ij}}$'
+`$V_{ij} = V_{ij} - \epsilon \frac{\partial E}{\partial V_{ij}}$`
 
 and
 
-'$W_{jk} = W_{jk} - \epsilon \frac{\partial E}{\partial W_{jk}}$'
+`$W_{jk} = W_{jk} - \epsilon \frac{\partial E}{\partial W_{jk}}$`
 
-where '$\epsilon$' is the step size.
+where `$\epsilon$` is the step size.
 
 ### Implementation Details
 
@@ -120,27 +120,26 @@ The 'ParallelANN' class is the main class of the ANN. This class uses a trait 'A
 
 The 'ParallelANN' class has the following constructors:
 
-'ParallelANN( stepSize, numIterations, miniBatchFraction, noInput, noHidden, noOutput, beta )'
-'ParallelANN()': assumes 'stepSize'=1.0, 'numIterations'=100, 'miniBatchFraction'=1.0, 'noInput'=1, 'noHidden'=5, 'noOutput'=1, 'beta'=1.0.
-'ParallelANN( noHidden )': as 'ParallelANN()', but allows specification of 'noHidden'
-'ParallelANN( noInput, noHidden )': as 'ParallelANN()', but allows specification of number of 'noInput' and 'noHidden'
-'ParallelANN( noInput, noHidden, noOutput )': as 'ParallelANN()', but allows specification of 'noInput', 'noHidden' and 'noOutput'
+* `ParallelANN( stepSize, numIterations, miniBatchFraction, noInput, noHidden, noOutput, beta )`
+* `ParallelANN()`: assumes 'stepSize'=1.0, 'numIterations'=100, 'miniBatchFraction'=1.0, 'noInput'=1, 'noHidden'=5, noOutput'=1, 'beta'=1.0.
+* `ParallelANN( noHidden )`: as 'ParallelANN()', but allows specification of 'noHidden'
+* `ParallelANN( noInput, noHidden )`: as 'ParallelANN()', but allows specification of number of 'noInput' and 'noHidden'
+* `ParallelANN( noInput, noHidden, noOutput )`: as 'ParallelANN()', but allows specification of 'noInput', 'noHidden' and 'noOutput'
 
-The number of input nodes '$I$' is stored in the variable 'noInput', the number of hidden nodes '$J$' is stored in 'noHidden' and the number of output nodes '$K$' is stored in 'noOutput'. 'beta' contains the value of '$\beta$' for the sigmoid function.
+The number of input nodes I is stored in the variable 'noInput', the number of hidden nodes J is stored in 'noHidden' and the number of output nodes K is stored in 'noOutput'. 'beta' contains the value of '$\beta$' for the sigmoid function.
 
 The parameters 'stepSize', 'numIterations' and 'miniBatchFraction' are of use for the Statistical Gradient Descent function.
 
-In addition, it has a single vector 'weights' corresponding to $V_{ij}$ and $W_{jk}$. The mapping of '$V_{ij}$' and '$W_{jk}$' into 'weights' is as follows:
-
-'$V_{ij}$' -> 'weights[  i + j*(noInput+1) ]$'
+In addition, it has a single vector 'weights' corresponding to V_{ij} and W_{jk}. The mapping of V_{ij} and W_{jk} into 'weights' is as follows:
 
-'$W_{jk}$' -> 'weights[ (noInput+1)*noHidden + j + k*(noHidden+1) ]$'
+* V_{ij} -> `weights[  i + j*(noInput+1) ]$`
+* W_{jk} -> `weights[ (noInput+1)*noHidden + j + k*(noHidden+1) ]$`
 
 The training function carries the name 'train'. It can take various inputs:
 
-'def train( rdd: RDD[(Vector,Vector)] )': starts a complete new training session and generates a new ANN.
-'def train( rdd: RDD[(Vector,Vector)], model: ParallelANNModel )': continues a training session with an existing ANN.
-'def train( rdd: RDD[(Vector,Vector)], weights: Vector )': starts a training session using initial weights as indicated by 'weights'.
+* `def train( rdd: RDD[(Vector,Vector)] )`: starts a complete new training session and generates a new ANN.
+* `def train( rdd: RDD[(Vector,Vector)], model: ParallelANNModel )`: continues a training session with an existing ANN.
+* `def train( rdd: RDD[(Vector,Vector)], weights: Vector )`: starts a training session using initial weights as indicated by 'weights'.
 
 The input of the training function is an RDD with (input/output) training pairs, each input and output being stored as a 'Vector'. The training function returns a variable of from class 'ParallelANNModel', as described below.
 
@@ -150,7 +149,7 @@ All information needed for the ANN is stored in the 'ParallelANNModel' class. Th
 
 The information in 'parallelANNModel' consist of the weights, the number of input, hidden and output nodes, as well as two functions 'predictPoint' and 'predictPointV'.
 
-The 'predictPoint' function is used to calculate a single output value as a 'Double'. If the output of the ANN actually is a vector, it returns just the first element of the vector, that is '$O_{0}$'. The output of the 'predictPointV' is of type 'Vector', and returns all '$K$' output values.
+The 'predictPoint' function is used to calculate a single output value as a 'Double'. If the output of the ANN actually is a vector, it returns just the first element of the vector, that is O_{0}. The output of the 'predictPointV' is of type 'Vector', and returns all K output values.
 
 ## The 'GeneralizedSteepestDescentAlgorithm' class
 

From 07218ebb8c38497dd3ab5f784d4594584614e0d4 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Fri, 22 Aug 2014 15:29:42 +0800
Subject: [PATCH 089/143] Update mllib-ann.md

Edited font size headers
---
 docs/mllib-ann.md | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/docs/mllib-ann.md b/docs/mllib-ann.md
index 70d35978f8a6c..b2b3c91314c7d 100644
--- a/docs/mllib-ann.md
+++ b/docs/mllib-ann.md
@@ -1,12 +1,12 @@
 layout: global
-title: Linear Methods - MLlib
-displayTitle: <a href="mllib-guide.html">MLlib</a> - Linear Methods
+title: Artificial Neural Networks - MLlib
+displayTitle: <a href="mllib-guide.html">MLlib</a> - Artificial Neural Networks
 ---
 
 * Table of contents
 {:toc}
 
-### Introduction
+# Introduction
 
 This document describes the MLLIB's Artificial Neural Network (ANN) implementation.
 
@@ -20,7 +20,7 @@ In addition, there is a demo/test available:
 * 'TestParallelANN.scala': tests parallel ANNs for various functions
 * 'TestParallelANNgraphics.scala': graphical output for 'TestParallelANN.scala'
 
-### Architecture and Notation
+# Architecture and Notation
 
 The file ParallelANN.scala implements a three-layer ANN with the following architecture:
 
@@ -86,7 +86,7 @@ Where g is the sigmod function
 
 and `$\beta` the learning rate.
 
-### Gradient descent
+# Gradient descent
 
 Currently, the MLLIB uses gradent descent for training. This means that the weights V_{ij} and W_{jk} are updated by adding a fraction of the gradient to V_{ij} and W_{jk} of the following function:
 
@@ -112,7 +112,7 @@ and
 
 where `$\epsilon$` is the step size.
 
-### Implementation Details
+# Implementation Details
 
 ## The 'ParallelANN' class
 
@@ -159,7 +159,7 @@ The 'GeneralizedSteepestDescendAlgorithm' class is based on the 'GeneralizedLine
 
 Science has provided many different strategies to train an ANN. Hence it is important that the optimising functions in MLLIB's ANN are interchangeable. The ParallelANN class has a variable 'optimizer', which is currently set to a 'GradientDescent' optimising class. The 'GradientDescent' optimising class implements a stochastic gradient descent method, and is also used for other optimisation technologies in Spark. It is expected that other optimising functions will be defined for Spark, and these can be stored in the 'optimizer' variable.
 
-### Demo/test
+# Demo/test
 
 Usage of MLLIB's ANN is demonstrated through the 'TestParallelANN' demo program. The program generates three functions:
 
@@ -169,7 +169,7 @@ Usage of MLLIB's ANN is demonstrated through the 'TestParallelANN' demo program.
 
 When the program is given the Java argument 'graph', it will show a graphical representation of the target function and the latest values.
 
-### Conclusion
+# Conclusion
 
 The 'ParallelANN' class implements a Artificial Neural Network (ANN), using the stochastic gradient descent method. It takes as input an RDD of input/output values of type 'Vector', and returns an object of type 'ParallelANNModel' containing the parameters of the trained ANN. The 'ParallelANNModel' object can also be used to calculate results after training.
 

From f7cfa4ee31503151b2a3f95c436782f29c56e939 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Fri, 22 Aug 2014 15:33:15 +0800
Subject: [PATCH 090/143] Update mllib-ann.md

---
 docs/mllib-ann.md | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/docs/mllib-ann.md b/docs/mllib-ann.md
index b2b3c91314c7d..a653724c06997 100644
--- a/docs/mllib-ann.md
+++ b/docs/mllib-ann.md
@@ -1,14 +1,12 @@
+---
 layout: global
 title: Artificial Neural Networks - MLlib
 displayTitle: <a href="mllib-guide.html">MLlib</a> - Artificial Neural Networks
 ---
 
-* Table of contents
-{:toc}
-
 # Introduction
 
-This document describes the MLLIB's Artificial Neural Network (ANN) implementation.
+This document describes the MLlib's Artificial Neural Network (ANN) implementation.
 
 The implementation currently consist of the following files:
 
@@ -88,7 +86,7 @@ and `$\beta` the learning rate.
 
 # Gradient descent
 
-Currently, the MLLIB uses gradent descent for training. This means that the weights V_{ij} and W_{jk} are updated by adding a fraction of the gradient to V_{ij} and W_{jk} of the following function:
+Currently, the MLlib uses gradent descent for training. This means that the weights V_{ij} and W_{jk} are updated by adding a fraction of the gradient to V_{ij} and W_{jk} of the following function:
 
 `$E = \sum_{k=0}^{K-1} (O_k - Y_k )^2$`
 
@@ -157,11 +155,11 @@ The 'GeneralizedSteepestDescendAlgorithm' class is based on the 'GeneralizedLine
 
 ## Training
 
-Science has provided many different strategies to train an ANN. Hence it is important that the optimising functions in MLLIB's ANN are interchangeable. The ParallelANN class has a variable 'optimizer', which is currently set to a 'GradientDescent' optimising class. The 'GradientDescent' optimising class implements a stochastic gradient descent method, and is also used for other optimisation technologies in Spark. It is expected that other optimising functions will be defined for Spark, and these can be stored in the 'optimizer' variable.
+Science has provided many different strategies to train an ANN. Hence it is important that the optimising functions in MLlib's ANN are interchangeable. The ParallelANN class has a variable 'optimizer', which is currently set to a 'GradientDescent' optimising class. The 'GradientDescent' optimising class implements a stochastic gradient descent method, and is also used for other optimisation technologies in Spark. It is expected that other optimising functions will be defined for Spark, and these can be stored in the 'optimizer' variable.
 
 # Demo/test
 
-Usage of MLLIB's ANN is demonstrated through the 'TestParallelANN' demo program. The program generates three functions:
+Usage of MLlib's ANN is demonstrated through the 'TestParallelANN' demo program. The program generates three functions:
 
 * f2d: x -> y
 * f3d: (x,y) -> z

From d3211dbd7792ebf329af2699ca6295587db4fc28 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Fri, 22 Aug 2014 15:35:54 +0800
Subject: [PATCH 091/143] Update mllib-ann.md

---
 docs/mllib-ann.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/mllib-ann.md b/docs/mllib-ann.md
index a653724c06997..975a9523be82e 100644
--- a/docs/mllib-ann.md
+++ b/docs/mllib-ann.md
@@ -147,7 +147,7 @@ All information needed for the ANN is stored in the 'ParallelANNModel' class. Th
 
 The information in 'parallelANNModel' consist of the weights, the number of input, hidden and output nodes, as well as two functions 'predictPoint' and 'predictPointV'.
 
-The 'predictPoint' function is used to calculate a single output value as a 'Double'. If the output of the ANN actually is a vector, it returns just the first element of the vector, that is O_{0}. The output of the 'predictPointV' is of type 'Vector', and returns all K output values.
+The 'predictPoint' function is used to calculate a single output value as a 'Double'. If the output of the ANN actually is a vector, it returns just the first element of the vector, that is O_0. The output of the 'predictPointV' is of type 'Vector', and returns all K output values.
 
 ## The 'GeneralizedSteepestDescentAlgorithm' class
 

From 51ca78bbea98fe43f7d2bf60e89a192b4fec6e8f Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Fri, 22 Aug 2014 15:44:00 +0800
Subject: [PATCH 092/143] Update mllib-ann.md

---
 docs/mllib-ann.md | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docs/mllib-ann.md b/docs/mllib-ann.md
index 975a9523be82e..005a5be4987d9 100644
--- a/docs/mllib-ann.md
+++ b/docs/mllib-ann.md
@@ -82,7 +82,7 @@ Where g is the sigmod function
 
 `$g(t) = \frac{e^{\beta t} }{1+e^{\beta t}}$`
 
-and `$\beta` the learning rate.
+and `$\beta$` defines the steepness of g.
 
 # Gradient descent
 
@@ -124,14 +124,14 @@ The 'ParallelANN' class has the following constructors:
 * `ParallelANN( noInput, noHidden )`: as 'ParallelANN()', but allows specification of number of 'noInput' and 'noHidden'
 * `ParallelANN( noInput, noHidden, noOutput )`: as 'ParallelANN()', but allows specification of 'noInput', 'noHidden' and 'noOutput'
 
-The number of input nodes I is stored in the variable 'noInput', the number of hidden nodes J is stored in 'noHidden' and the number of output nodes K is stored in 'noOutput'. 'beta' contains the value of '$\beta$' for the sigmoid function.
+The number of input nodes I is stored in the variable 'noInput', the number of hidden nodes J is stored in 'noHidden' and the number of output nodes K is stored in 'noOutput'. 'beta' contains the value of `$\beta$` for the sigmoid function.
 
 The parameters 'stepSize', 'numIterations' and 'miniBatchFraction' are of use for the Statistical Gradient Descent function.
 
 In addition, it has a single vector 'weights' corresponding to V_{ij} and W_{jk}. The mapping of V_{ij} and W_{jk} into 'weights' is as follows:
 
-* V_{ij} -> `weights[  i + j*(noInput+1) ]$`
-* W_{jk} -> `weights[ (noInput+1)*noHidden + j + k*(noHidden+1) ]$`
+* V_{ij} -> `weights[  i + j*(noInput+1) ]`
+* W_{jk} -> `weights[ (noInput+1)*noHidden + j + k*(noHidden+1) ]`
 
 The training function carries the name 'train'. It can take various inputs:
 

From ceaf2f7352f83d0cc93c213ce85ab5b106467436 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Tue, 2 Sep 2014 09:40:30 +0800
Subject: [PATCH 093/143] Update and rename
 GeneralizedSteepestDescentAlgorithm.scala to GeneralizedModel.scala

Make the model more general, as it can be used for algorithms other than steepest descent too.
---
 ...Algorithm.scala => GeneralizedModel.scala} | 25 ++++++-------------
 1 file changed, 7 insertions(+), 18 deletions(-)
 rename mllib/src/main/scala/org/apache/spark/mllib/ann/{GeneralizedSteepestDescentAlgorithm.scala => GeneralizedModel.scala} (83%)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescentAlgorithm.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedModel.scala
similarity index 83%
rename from mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescentAlgorithm.scala
rename to mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedModel.scala
index 3bc499fa10270..ba3e31ae9d6da 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedSteepestDescentAlgorithm.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedModel.scala
@@ -29,13 +29,13 @@ import breeze.linalg.{SparseVector => BSV}
 
 /**
  * :: DeveloperApi ::
- * GeneralizedSteepestDescendModel represents a model trained using
- * GeneralizedSteepestDescendAlgorithm.
+ * GeneralizedModel represents a model trained using
+ * GeneralizedAlgorithm.
  *
  * @param weights Weights computed for every feature.
  */
 @DeveloperApi
-abstract class GeneralizedSteepestDescentModel(val weights: Vector )
+abstract class GeneralizedModel(val weights: Vector )
 
   extends Serializable {
 
@@ -123,12 +123,11 @@ abstract class GeneralizedSteepestDescentModel(val weights: Vector )
 
 /**
  * :: DeveloperApi ::
- * GeneralizedSteepestDescend implements methods to train a function using
- * the Steepest Descend algorithm.
- * This class should be extended with an Optimizer to create a new GLM.
+ * GeneralizedAlgorithm implements methods to train a function.
+ * This class should be extended with an Optimizer to create a new GM.
  */
 @DeveloperApi
-abstract class GeneralizedSteepestDescentAlgorithm[M <: GeneralizedSteepestDescentModel]
+abstract class GeneralizedAlgorithm[M <: GeneralizedModel]
   extends Logging with Serializable {
 
   /** The optimizer to solve the problem. */
@@ -139,19 +138,9 @@ abstract class GeneralizedSteepestDescentAlgorithm[M <: GeneralizedSteepestDesce
    */
   protected def createModel(weights: Vector): M
 
-  /** Prepends one to the input vector. */
-  private def prependOne(vector: Vector): Vector = {
-    val vector1 = vector.toBreeze match {
-      case dv: BDV[Double] => BDV.vertcat(BDV.ones[Double](1), dv)
-      case sv: BSV[Double] => BSV.vertcat(new BSV[Double](Array(0), Array(1.0), 1), sv)
-      case v: Any => throw new IllegalArgumentException("Do not support vector type " + v.getClass)
-    }
-    Vectors.fromBreeze(vector1)
-  }
-
   /**
    * Run the algorithm with the configured parameters on an input RDD
-   * of LabeledPoint entries starting from the initial weights provided.
+   * of (Vector,Vector) entries starting from the initial weights provided.
    */
   def run(input: RDD[(Vector,Vector)], initialWeights: Vector): M = {
 

From 6f79c9678c8c0d85909421fdb04ceec0f880bb33 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Tue, 2 Sep 2014 09:48:20 +0800
Subject: [PATCH 094/143] Update ParallelANN.scala

Updated the code to implement true back-propagation

Thanks to Alexander Ulanov (avulanov) for implementing true back-propagation in his repository first. This code borrows extensively from his code, and uses the same back-propagation algorithm (save for using arrays rather than matrices/vectors) and "layers" vector (here called "ontology").
---
 .../apache/spark/mllib/ann/ParallelANN.scala  | 532 ++++++++++--------
 1 file changed, 301 insertions(+), 231 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala
index d1a31f2c598a3..789854da4d32a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala
@@ -34,159 +34,168 @@ import org.apache.spark.util.random.XORShiftRandom
 /*
  * Implements a Artificial Neural Network (ANN)
  *
- * format of data:
- * data[ 0..noInput-1 ]: Input
- * data[ noInput..noInput+noOutput-1 ]: Output
+ * The data consists of an input vector and an output vector, combined into a single vector
+ * as follows:
+ *
+ * [ ---input--- ---output--- ]
+ *
+ * NOTE: output values should be in the range [0,1]
+ *
+ * For a network of L layers:
+ *
+ * topology( l ) indicates the number of nodes in layer l, excluding the bias node.
+ *
+ * noInput = topology(0), the number of input nodes
+ * noOutput = topology(L-1), the number of output nodes
+ *
+ * input = data( 0 to noInput-1 )
+ * output = data( noInput to noInput+noOutput-1 )
+ *
+ * W_ijl is the weight from node i in layer l-1 to node j in layer l
+ * W_ijl goes to position ofsWeight(l) + j*(topology(l-1)+1) + i in the weights vector
+ *
+ * B_jl is the bias input of node j in layer l
+ * B_jl goes to position ofsWeight(l) + j*(topology(l-1)+1) + topology(l-1) in the weights vector
+ *
+ * error function: E( O, Y ) = sum( O_j - Y_j )
+ * (with O = (O_0, ..., O_(noOutput-1)) the output of the ANN,
+ * and (Y_0, ..., Y_(noOutput-1)) the input)
+ *
+ * node_jl is node j in layer l
+ * node_jl goes to position ofsNode(l) + j
+ *
+ * The weights gradient is defined as dE/dW_ijl and dE/dB_jl
+ * It has same mapping as W_ijl and B_jl
+ *
+ * For back propagation:
+ * delta_jl = dE/dS_jl, where S_jl the output of node_jl, but before applying the sigmoid
+ * delta_jl has the same mapping as node_jl
+ *
+ * Where E = ((estOutput-output),(estOutput-output)),
+ * the inner product of the difference between estimation and target output with itself.
  *
  */
 
-trait ANN {
-
-  def noInput: Integer
-  def noHidden: Integer
-  def noOutput: Integer
-  def beta: Double
-
-  def g( x: Double ) = (1/(1 + math.exp(-beta*x)))
-  def dg( x: Double ) = beta*g(x)*(1 - g(x))
+class ParallelANNModel private[mllib] (
+    override val weights: Vector,
+    val topology: Array[Int] )
+  extends GeneralizedModel(weights) with RegressionModel with Serializable {
 
-  /* returns the hidden layer including the -1 robonode! */
-  def computeHidden( data: Array[Double], weights: Array[Double] ): Array[Double] = {
+  val L = topology.length-1
 
-    var arrHidden = new Array[Double]( noHidden + 1 )
+  val ofsWeight: Array[Int] = {
 
-    for( j <- 0 to noHidden-1 ) {
+    var tmp = new Array[Int]( L + 1 )
 
-      val start = j*(noInput + 1)
-      var v: Double = 0;
-      for( w <- 0 to noInput-1 )
-        v = v + data(w)*weights( start + w )
-      v = v - 1.0 * weights( start + noInput ) // robonode
-      arrHidden( j ) = g( v )
+    var curPos = 0;
 
+    tmp( 0 ) = 0;
+    for( l <- 1 to L ) {
+      tmp( l ) = curPos
+      curPos = curPos + ( topology( l - 1 ) + 1 ) * ( topology( l ) )
     }
 
-    arrHidden( noHidden ) = -1.0
-
-    arrHidden
+    tmp
 
   }
 
-  /* returns the hidden layer including the -1 robonode, as well as the final estimation */
-  def computeValues( 
-      data: Array[Double], 
-      weights: Array[Double] ): 
-      (Array[Double], Array[Double]) = {
+  def g( x: Double ) = 1.0 / (1.0 + math.exp( -x ) )
+
+  def computeValues( arrData: Array[Double], arrWeights: Array[Double] ): Array[Double] = {
 
-    var hidden = computeHidden( data, weights )
-    var output = new Array[Double](noOutput)
+    var arrPrev = new Array[Double]( topology( 0 ) )
 
-    for( k <- 0 to noOutput - 1 ) {
-      var tmp: Double = 0.0;
-      for( i <- 0 to noHidden )
-        tmp = tmp + hidden(i)*weights( noHidden * ( noInput + 1 ) + k * ( noHidden + 1 ) + i )
-      output(k) = g( tmp )
+    for( i <- 0 until topology( 0 ) )
+      arrPrev( i ) = arrData( i )
 
+    for( l <- 1 to L ) {
+      val arrCur = new Array[Double]( topology( l ) )
+      for( j <- 0 until topology( l ) ) {
+        var cum: Double = 0.0
+        for( i <-0 until topology( l-1 ) )
+          cum = cum +
+            arrPrev( i ) * arrWeights( ofsWeight( l ) + ( topology( l-1 ) + 1 ) * j + i )
+        cum = cum +
+          arrWeights( ofsWeight( l ) + ( topology( l-1 ) + 1 )*j + topology( l-1 ) ) // bias
+        arrCur( j ) = g( cum )
+      }
+      arrPrev = arrCur;
     }
 
-    ( hidden, output )
+    arrPrev
 
   }
 
-}
-
-class ParallelANNModel private[mllib] (
-    override val weights: Vector,
-    val noInp: Integer,
-    val noHid: Integer,
-    val noOut: Integer,
-    val b: Double )
-  extends GeneralizedSteepestDescentModel(weights) with RegressionModel with Serializable with ANN {
-
-  val noInput = noInp
-  val noHidden = noHid
-  val noOutput = noOut
-  val beta = b
-
   override def predictPoint( data: Vector, weights: Vector ): Double = {
-    val outp = computeValues( data.toArray, weights.toArray )._2
+    val outp = computeValues( data.toArray, weights.toArray )
     outp(0)
   }
 
   def predictPointV( data: Vector, weights: Vector): Vector = {
-    Vectors.dense( computeValues( data.toArray, weights.toArray )._2 )
+    Vectors.dense( computeValues( data.toArray, weights.toArray ) )
   }
 
 }
 
-class ParallelANN private (
-    private var stepSize: Double,
+class ParallelANN(
+    private var topology: Array[Int],
     private var numIterations: Int,
-    private var miniBatchFraction: Double,
-    private var noInput: Int,
-    private var noHidden: Int,
-    private var noOutput: Int,
-    private val beta: Double )
-  extends GeneralizedSteepestDescentAlgorithm[ParallelANNModel] with Serializable {
+    private var stepSize: Double,
+    private var miniBatchFraction: Double )
+  extends GeneralizedAlgorithm[ParallelANNModel] with Serializable {
 
   private val rand = new XORShiftRandom
 
-  private val gradient = new LeastSquaresGradientANN( noInput, noHidden, noOutput, beta )
+  private val gradient = new LeastSquaresGradientANN( topology )
   private val updater = new ANNUpdater()
   override val optimizer = new GradientDescent(gradient, updater)
     .setStepSize(stepSize)
     .setNumIterations(numIterations)
     .setMiniBatchFraction(miniBatchFraction)
 
-  def this() = {
-    this( 1.0, 100, 1.0, 1, 5, 1, 1.0 )
-  }
+  val noWeights = {
 
-  def this( noHidden: Int ) = {
-    this( 1.0, 100, 1.0, 1, noHidden, 1, 1.0 )
-  }
+    var tmp = 0
 
-  def this( noInput: Int, noHidden: Int ) = {
-    this( 1.0, 100, 1.0, noInput, noHidden, 1, 1.0 )
-  }
+    for( i<-1 until topology.size ) {
+      tmp = tmp + topology(i) * (topology(i-1) + 1)
+    }
+
+    tmp
 
-  def this( noInput: Int, noHidden: Int, noOutput: Int ) = {
-    this( 1.0, 100, 1.0, noInput, noHidden, noOutput, 1.0 )
   }
 
-  override protected def createModel(weights: Vector) = {
-    new ParallelANNModel( weights, noInput, noHidden, noOutput, beta )
+  def this( topology: Array[Int] ) = {
+    this( topology, 100, 1.0, 1.0 )
   }
 
-  def checkOutput( rdd: RDD[(Vector,Vector)] ) {
-    val oVals = rdd.flatMap( T => T._2.toArray )
-    var omax = oVals.max
-    assert( omax <= 1 )
-    var omin = oVals.min
-    assert( omin >= 0 )
+  def this( noInput: Int, noHidden: Int, noOutput: Int ) = {
+    this( Array( noInput, noHidden, noOutput ) )
   }
 
-  def randomDouble( i: Int ): Double = {
-    rand.nextDouble()
+  override protected def createModel( weights: Vector ) = {
+    new ParallelANNModel( weights, topology )
   }
 
   def train( rdd: RDD[(Vector,Vector)] ): ParallelANNModel = {
 
     val ft = rdd.first()
 
-    assert( noInput == ft._1.size )
-    assert( noOutput == ft._2.size )
+    assert( topology( 0 ) == ft._1.size )
+    assert( topology( topology.length-1 ) == ft._2.size )
 
-    checkOutput( rdd )
+    val initialWeightsArr = new Array[Double](noWeights)
 
-    val noWeights = (noInput + 1)*noHidden + (noHidden + 1)*noOutput
+    var pos = 0;
 
-    val initialWeightsArr = new Array[Double](noWeights)
+    for( l <- 1 until topology.length ) {
+      for( i <- 0 until ( topology( l ) * ( topology( l - 1 ) + 1 ) ) ) {
+        initialWeightsArr( pos ) = ( rand.nextDouble * 4.8 - 2.4 ) / ( topology( l - 1 ) + 1)
+      pos = pos + 1;
+      }
+    }
 
-    for( i <- 0 to (noInput + 1)*noHidden - 1 )
-      initialWeightsArr( i ) = (randomDouble(i)*4.8 - 2.4)/(noInput + 1)
-    for( i <- 0 to (noHidden + 1)*noOutput - 1 )
-      initialWeightsArr( (noInput + 1)*noHidden + i ) = (randomDouble(i)*4.8 - 2.4)/(noHidden + 1)
+    assert( pos == noWeights )
 
     val initialWeights = Vectors.dense( initialWeightsArr )
 
@@ -195,190 +204,255 @@ class ParallelANN private (
   }
 
   def train( rdd: RDD[(Vector,Vector)], model: ParallelANNModel ): ParallelANNModel = {
+
     run( rdd, model.weights )
+
   }
 
   def train( rdd: RDD[(Vector,Vector)], weights: Vector ): ParallelANNModel = {
 
     val ft = rdd.first()
-    assert( noInput == ft._1.size )
-    assert( noOutput == ft._2.size )
-    assert( weights.size == (noInput + 1) * noHidden + (noHidden + 1) * noOutput )
+    assert( weights.size == noWeights )
     run( rdd, weights );
 
   }
 
 }
 
-/**
- * data consists of input vector and output vector, and has the following form:
- *
- * [ ---input--- ---output--- ]
- *
- * where input = data( 0 to noInput-1 ) and output = data( noInput to noInput+noOutput-1 )
- *
- * V_ij is the weight from input node i to hidden node j
- * W_jk is the weight from hidden node j to output node k
- *
- * The weights have the following mapping:
- *
- * V_ij goes to position i + j*(noInput+1)
- * W_jk goes to position (noInput+1)*noHidden + j + k*(noHidden+1)
- *
- * Gradient has same mapping, i.e.
- * dE/dVij goes to i + j*(noInput+1)
- * dE/dWjk goes to (noInput+1)*noHidden + j +k*(noHidden+1)
- *
- * Where E = ((estOutput-output),(estOutput-output)),
- * the inner product of the difference between estimation and target output with itself.
- */
-
 class LeastSquaresGradientANN(
-    noInp: Integer,
-    noHid: Integer,
-    noOut: Integer,
-    b: Double )
-  extends Gradient with ANN {
+    topology: Array[Int] )
+  extends Gradient {
 
-  val noInput = noInp
-  val noHidden = noHid
-  val noOutput = noOut
-  val beta = b
+  def g( x: Double ) = 1.0 / (1.0 + math.exp( -x ) )
 
-  /* For verification only
-  private val rand = new XORShiftRandom
-  */
+  val L = topology.length-1
 
-  override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
+  val noWeights = {
 
-    val arrData = data.toArray
-    val arrWeights = weights.toArray
+    var tmp = 0
+
+    for( i<-1 to L ) {
+      tmp = tmp + topology(i) * ( topology( i - 1 ) + 1 )
+    }
+
+    tmp
+
+  }
 
-    var gradient = new Array[Double]( (noInput + 1) * noHidden + (noHidden + 1) * noOutput )
+  val ofsWeight: Array[Int] = {
 
-    val (arrHidden, output) = computeValues( arrData, arrWeights )
-    val arrEst = output
+    var tmp = new Array[Int]( L + 1 )
+    var curPos = 0;
 
-    var diff = new Array[Double]( noOutput )
-    var E: Double = 0.0
-    for( i <-0 to noOutput-1 ) {
-      diff( i ) = arrEst( i ) - arrData( noInput.toInt + i );
-      E = E + diff(i) * diff(i)
+    tmp( 0 ) = 0;
+    for( l <- 1 to L ) {
+      tmp( l ) = curPos
+      curPos = curPos + ( topology( l - 1 ) + 1 ) * ( topology( l ) )
     }
 
-    /*
-     * The following fields are for verification only
-    val eps = .000001
-    val testOneVOutOf = 5000;
-    val testOneWOutOf = 2500;
-    var arrWeights_tmp = weights.toArray
-    val warnErr = 5e-7
-    */
+    tmp
+
+  }
 
-    /* Wjk */
-    for( k <- 0 to noOutput - 1 ) {
+  val noNodes: Int = {
 
-      var start = noHidden*(noInput + 1) + k*(noHidden + 1)
-      var sum_l: Double = 0
-      for( w <- 0 to noHidden )
-         sum_l = sum_l +  arrHidden( w ) * arrWeights( w + start )
-      val dg_sum_l = dg( sum_l )
+    var tmp: Integer = 0
 
+    for( l <-0 until topology.size ) {
+      tmp = tmp + topology( l )
+    }
 
-      for( j <- 0 to noHidden ) {
+    tmp
 
-        gradient( noHidden*(noInput + 1) + k*(noHidden + 1) + j )
-          = 2*(diff(k))*dg_sum_l*arrHidden(j)
+  }
 
-        /*
-         * The following is for verification only
-        if( rand.nextInt % (testOneWOutOf>>1) == 0 ) {
-          arrWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j )
-            = arrWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j ) + eps
-          val est2 = computeValues( arrData, arrWeights_tmp )._2
-          var E2: Double = 0.0;
-          for( w <- 0 to noOutput-1 ) {
-            val diff2 = est2(w)-data( noInput+w )
-            E2 = E2 + diff2*diff2
-          }
-          val d = ( E2 - E ) / eps
-          val compErr = math.abs( gradient( noHidden*(noInput+1)+k*(noHidden+1)+j) - d )
-          if( compErr > warnErr ) {
-            println( "!!! Calc/Est Wjk: " + 
-              ( ( gradient( noHidden*(noInput+1)+k*(noHidden+1)+j), d ), compErr ) )
-          }
-          arrWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j )
-            = arrWeights_tmp( noHidden*(noInput+1)+k*(noHidden+1)+j ) - eps
-        }
-        */
+  val ofsNode: Array[Int] = {
 
+    var tmp = new Array[Int]( L + 1 )
+    tmp( 0 ) = 0
+
+    for( l <-1 to L ) {
+      tmp( l ) = tmp( l - 1 ) + topology( l - 1 )
+    }
+
+    tmp
+
+  }
+
+  /* For verification only
+  def calcErr( arrData: Array[Double], arrWeights: Array[Double] ): Double = {
+
+    var arrPrev = new Array[Double]( topology( 0 ) )
+
+    for( i <- 0 until topology( 0 ) )
+      arrPrev( i ) = arrData( i )
+
+    for( l <- 1 to L ) {
+      val arrCur = new Array[Double]( topology( l ) )
+      for( j <- 0 until topology( l ) ) {
+        var cum: Double = 0.0
+        for( i <-0 until topology( l-1 ) ) {
+          cum = cum +
+            arrPrev( i ) * arrWeights( ofsWeight( l ) + ( topology( l-1 ) + 1 ) * j + i )
+        }
+        cum = cum +
+          arrWeights( ofsWeight( l ) + ( topology( l-1 ) + 1 )*j + topology( l-1 ) ) // bias
+        arrCur( j ) = g( cum )
       }
+      arrPrev = arrCur;
+    }
+
+    val arrDiff = new Array[Double]( topology( L ) )
+    for( j <- 0 until topology( L ) ) {
+      arrDiff( j ) = ( arrPrev( j ) - arrData( topology(0) + j ) )
+    }
+
+    var err: Double = 0;
+    for( j <-0 until topology( L ) ) {
+      err = err + arrDiff( j )*arrDiff( j )
+    }
+
+    err*.5
+  }
+  */
+
+  override def compute( data: Vector, label: Double, weights: Vector ): ( Vector, Double ) = {
+
+    val arrData = data.toArray
+    val arrWeights = weights.toArray
+    val arrNodes = new Array[Double]( noNodes )
+
+    /*
+     * nodes
+     */
 
+    for( i <- 0 until topology( 0 ) ) {
+      arrNodes( i ) = arrData( i )
     }
 
-    var start = noHidden * (noInput + 1)
-    var sum_n1: Double = 0
-    for( w <- 0 to noHidden )
-       sum_n1 = sum_n1 + arrHidden( w )*arrWeights( w + start )
-    val dg_sum_n1 = dg( sum_n1 )
+    for( l <- 1 to L ) {
+      for( j <- 0 until topology( l ) ) {
+        var cum: Double = 0.0;
+        for( i <- 0 until topology( l-1 ) ) {
+          cum = cum +
+            arrWeights( ofsWeight( l ) + ( topology( l-1 ) + 1 ) * j + i ) *
+            arrNodes( ofsNode( l-1 ) + i )
+        }
+        cum = cum + arrWeights( ofsWeight( l ) + ( topology( l-1 ) +  1 )*j + topology( l-1 ) )
+        arrNodes( ofsNode( l ) + j ) = g( cum )
+      }
+    }
 
+    val arrDiff = new Array[Double]( topology( L ) )
+    for( j <- 0 until topology( L ) ) {
+      arrDiff( j ) = ( arrNodes( ofsNode( L ) + j ) - arrData( topology(0) + j ) )
+    }
 
-    /* Vij */
-    for( j <- 0 to noHidden - 1 ) { /* the hidden robonode has no associated Vij */
+    var err: Double = 0;
+    for( j <-0 until topology( L ) ) {
+      err = err + arrDiff( j )*arrDiff( j )
+    }
+    err = err*.5
 
-      start = j * ( noInput + 1 )
-      var sum_n2: Double = 0
-      for( w <- 0 to noInput-1 ) // non-robonodes
-         sum_n2 = sum_n2 + arrData( w )*arrWeights( w + start)
-      sum_n2 = sum_n2 - arrWeights( noInput + start) // robonode
-      val dg_sum_n2 = dg( sum_n2 )
+    /*
+     * back propagation
+     */
 
-      for( i <- 0 to noInput ) {
+    val arrDelta = new Array[Double]( noNodes )
 
+    for( j <- 0 until topology( L ) ) {
+      arrDelta( ofsNode( L ) + j ) =
+        arrDiff( j ) *
+        arrNodes( ofsNode( L ) + j ) * ( 1 - arrNodes( ofsNode( L ) + j ) )
+    }
 
-        for( k<- 0 to noOutput - 1 ) {
+    for( l <- L-1 until 0 by -1 ) {
+      for( j <- 0 until topology( l ) ) {
+        var cum: Double = 0.0
+        for( i <- 0 until topology( l + 1 ) ) {
+          cum = cum +
+            arrWeights( ofsWeight( l + 1 ) + ( topology( l ) + 1 ) * i + j ) *
+            arrDelta( ofsNode( l + 1 ) + i )  *
+            arrNodes( ofsNode( l ) + j ) * ( 1 - arrNodes( ofsNode( l ) + j ) )
+        }
+        arrDelta( ofsNode( l ) + j ) = cum
+      }
+    }
 
-          if( i<noInput ) { // non-robonode
-            gradient( i + j * (noInput + 1) ) =
-            gradient( i + j * (noInput + 1) ) +
-              2 * ( diff(k)  )* dg_sum_n1 *
-              arrWeights( noHidden * (noInput + 1) + k * (noHidden + 1) + j ) *
-              dg_sum_n2*arrData( i )
+    /*
+     * gradient
+     */
+
+    /* for verification only
+    val arrWcopy = new Array[Double]( noWeights )
+    Array.copy(arrWeights, 0, arrWcopy, 0, noWeights )
+    val eps = 0.000001
+    val errGradAccept = 5e-6
+    */
 
+    val arrGrad = new Array[Double]( noWeights )
+
+    for( l <- 1 to L ) {
+      for( j <-0 until topology( l ) ) {
+        for( i <- 0 until topology( l-1 ) ) {
+          arrGrad( ofsWeight( l ) + ( topology( l - 1 ) + 1 ) * j + i ) =
+            arrNodes( ofsNode( l - 1 ) + i ) *
+            arrDelta( ofsNode( l ) + j )
+
+          /* for verification only
+          val tmpErr0 = calcErr( arrData, arrWcopy )
+          arrWcopy( ofsWeight( l ) + ( topology( l -1 ) + 1 ) * j + i ) =
+            arrWcopy( ofsWeight( l ) + ( topology( l -1 ) + 1 ) * j + i ) + eps
+          val tmpErr1 = calcErr( arrData, arrWcopy )
+          arrWcopy( ofsWeight( l ) + ( topology( l -1 ) + 1 ) * j + i ) =
+            arrWcopy( ofsWeight( l ) + ( topology( l -1 ) + 1 ) * j + i ) - eps
+          val dE = ( tmpErr1 - tmpErr0 ) / eps
+
+          val errGrad =
+            math.abs( dE - arrGrad( ofsWeight( l ) +
+            ( topology( l - 1 ) + 1 ) * j + i ) )
+
+          try {
+            assert( errGrad < errGradAccept )
           }
-          else { // robonode
-            gradient( i + j * (noInput + 1) ) =
-            gradient( i + j * (noInput + 1) ) -
-              2 * ( diff(k) ) * dg_sum_n1 *
-              arrWeights( noHidden * (noInput + 1) + k * (noHidden + 1) + j ) *
-              dg_sum_n2
+          catch {
+            case e: AssertionError =>
+              println( (dE, arrGrad( ofsWeight( l ) +
+                ( topology( l - 1 ) + 1 ) * j + i ), errGrad ) )
           }
-
+          */
         }
 
-        /*
-         * The following is for verification only
-        if( rand.nextInt % (testOneVOutOf>>1) == 0 ) {
-          arrWeights_tmp( i+j*(noInput+1) ) = arrWeights_tmp( i+j*(noInput+1) ) + eps
-          val est2 = computeValues( arrData, arrWeights_tmp )._2
+        arrGrad( ofsWeight( l ) + ( topology( l - 1 ) + 1 ) * j + topology( l-1 )  ) =
+          arrDelta( ofsNode( l ) + j )
 
-          var E2: Double = 0.0;
-          for( w <- 0 to noOutput-1 ) {
-            val diff2 = est2(w)-data( noInput+w )
-            E2 = E2 + diff2*diff2
-          }
+        /* for verification only
+        val tmpErr0 = calcErr( arrData, arrWcopy )
+        arrWcopy( ofsWeight( l ) + ( topology( l - 1 ) + 1 ) * j + topology( l-1 ) ) =
+          arrWcopy( ofsWeight( l ) + ( topology( l - 1 ) + 1 ) * j + topology( l-1 ) ) + eps
+        val tmpErr1 = calcErr( arrData, arrWcopy )
+        arrWcopy( ofsWeight( l ) + ( topology( l - 1 ) + 1 ) * j + topology( l-1 ) ) =
+          arrWcopy( ofsWeight( l ) + ( topology( l - 1 ) + 1 ) * j + topology( l-1 ) ) - eps
+        val dE = ( tmpErr1 - tmpErr0 ) / eps
 
-          val d = ( E2 - E ) / eps
-          val compErr = math.abs( gradient( i+j*(noInput+1) )-d )
-          if( compErr>warnErr )
-            println( "!!! Calc/Est Vij: "+ ( ( gradient( i+j*(noInput+1) ), d ), compErr ) )
-          arrWeights_tmp( i+j*(noInput+1) ) = arrWeights_tmp( i+j*(noInput+1) ) - eps
+        val errGrad = math.abs( dE -
+          arrGrad( ofsWeight( l ) + ( topology( l - 1 ) + 1 ) * j + topology( l-1 ) ) )
+
+        try {
+          assert( errGrad < errGradAccept )
+        }
+        catch {
+          case e: AssertionError =>
+            println( (dE,
+              arrGrad( ofsWeight( l ) + ( topology( l - 1 ) + 1 ) * j + topology( l-1 ) ),
+              errGrad ) )
         }
         */
+
       }
     }
 
-    (Vectors.dense(gradient), E)
+    ( Vectors.dense( arrGrad ), err )
 
   }
 
@@ -389,9 +463,7 @@ class LeastSquaresGradientANN(
       cumGradient: Vector): Double = {
 
     val (grad, err) = compute( data, label, weights )
-
     cumGradient.toBreeze += grad.toBreeze
-
     return err
 
   }
@@ -407,12 +479,10 @@ class ANNUpdater extends Updater {
       regParam: Double): (Vector, Double) = {
 
     val thisIterStepSize = stepSize
-
     val brzWeights: BV[Double] = weightsOld.toBreeze.toDenseVector
-
     brzAxpy(-thisIterStepSize, gradient.toBreeze, brzWeights)
-
     (Vectors.fromBreeze(brzWeights), 0)
+
   }
 
 }

From 29727473b72714f19d53b271179916a62291c7e1 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Tue, 2 Sep 2014 09:50:00 +0800
Subject: [PATCH 095/143] Update TestParallelANN.scala

Updated to new interface using the "topology" vector.
---
 .../spark/mllib/ann/TestParallelANN.scala     | 32 +++++++++----------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
index b37c8a493dd2b..a097df0c5f520 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
@@ -127,9 +127,9 @@ object TestParallelANN {
   def main( arg: Array[String] ) {
 
     println( "Parallel ANN tester" )
+    println
 
-    val formatter = new SimpleDateFormat("hh:mm:ss")
-    val starttime = Calendar.getInstance().getTime()
+    val formatter = new SimpleDateFormat("hh:mm:ss")    
 
     var curAngle: Double = 0.0
     var graphic: Boolean = false
@@ -158,12 +158,16 @@ object TestParallelANN {
     var A = 20.0
     var B = 50.0
 
-    var conf = new SparkConf().setAppName("Parallel ANN").setMaster("local[5]")
+    var conf = new SparkConf().setAppName("Parallel ANN").setMaster("local[1]")
     var sc = new SparkContext(conf)
 
-    val testRDD2D = sc.parallelize( generateInput2D( T => f(T), -10, 10, 100 ), 2).cache()
+    val testRDD2D = sc.parallelize( generateInput2D( T => f(T), -10, 10, 100 ), 2).cache
     val testRDD3D = sc.parallelize( generateInput3D( (x,y) => f3D(x,y), -10, 10, -10, 10, 200 ), 2).cache
     val testRDD4D = sc.parallelize( generateInput4D( t => f4D(t), -10, 10, 100 ), 2 ).cache
+    
+    val validationRDD2D = sc.parallelize( generateInput2D( T => f(T), -10, 10, 100 ), 2).cache
+    val validationRDD3D = sc.parallelize( generateInput3D( (x,y) => f3D(x,y), -10, 10, -10, 10, 100 ), 2).cache
+    val validationRDD4D = sc.parallelize( generateInput4D( t => f4D(t), -10, 10, 100 ), 2 ).cache
 
     if( graphic ) {
 
@@ -173,26 +177,25 @@ object TestParallelANN {
 
     }
 
-    val parallelANN2D = new ParallelANN( 1, 10 )
+    val parallelANN2D = new ParallelANN( Array[Int]( 1, 3, 3, 1 ) )
     parallelANN2D.optimizer.setNumIterations(1000).setStepSize( 1.0 )
 
-    val parallelANN3D = new ParallelANN( 2, 20 )
+    val parallelANN3D = new ParallelANN( Array[Int]( 2, 20, 1 ) )
     parallelANN3D.optimizer.setNumIterations(1000).setStepSize( 1.0 )
 
-    val parallelANN4D = new ParallelANN( 1, 20, 3 )
+    val parallelANN4D = new ParallelANN( Array[Int]( 1, 20, 3 ) )
     parallelANN4D.optimizer.setNumIterations( 1000 ).setStepSize( 1.0 )
+    
+    val starttime = Calendar.getInstance().getTime()
+    println( "Start training " + starttime )
 
     var model2D = parallelANN2D.train( testRDD2D )
     var model3D = parallelANN3D.train( testRDD3D )
     var model4D = parallelANN4D.train( testRDD4D )
 
-    val noIt = 20
+    val noIt = 1500
     var errHist = new Array[(Int,Double,Double,Double)]( noIt )
 
-    val validationRDD2D = sc.parallelize( generateInput2D( T => f(T), -10, 10, 100 ), 2).cache
-    val validationRDD3D = sc.parallelize( generateInput3D( (x,y) => f3D(x,y), -10, 10, -10, 10, 100 ), 2).cache
-    val validationRDD4D = sc.parallelize( generateInput4D( t => f4D(t), -10, 10, 100 ), 2 ).cache
-
     for( i <- 0 to noIt - 1 ) {
 
       val predictedAndTarget2D = validationRDD2D.map( T => ( T._1, T._2, model2D.predictV( T._1 ) ) )
@@ -247,10 +250,7 @@ object TestParallelANN {
 
       }
 
-      val now = Calendar.getInstance().getTime()
-      val times = formatter.format( now );
-
-      println( "It. "+i+" ("+times+"), Error 2D/3D/4D: " + (err2D, err3D, err4D) )
+      println( "It. "+i+" ("+Calendar.getInstance().getTime()+"), Error 2D/3D/4D: " + (err2D, err3D, err4D) )
       errHist(i) = ( i, err2D, err3D, err4D )
 
       if( i < noIt - 1 ) {

From 6740981d59e9cbf477f373948cc7ab0738b07cc4 Mon Sep 17 00:00:00 2001
From: Alexander Ulanov <nashb@yandex.ru>
Date: Fri, 5 Sep 2014 18:11:21 +0400
Subject: [PATCH 096/143] ANN test suite: learning XOR function

---
 .../org/apache/spark/mllib/ann/ANNSuite.scala | 118 +++---------------
 1 file changed, 14 insertions(+), 104 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
index d95846d97c3b7..ff6eb51377d78 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
@@ -1,119 +1,29 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
 package org.apache.spark.mllib.ann
 
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.util.LocalSparkContext
-import org.apache.spark.util.random.XORShiftRandom
-
 import org.scalatest.FunSuite
 
 class ANNSuite extends FunSuite with LocalSparkContext {
-
+  private val inputs = Array[Array[Double]](
+    Array[Double](0,0),
+    Array[Double](0,1),
+    Array[Double](1,0),
+    Array[Double](1,1)
+  )
+  private val outputs = Array[Double](0, 1, 1, 0)
+  private val inputSize = 2
+  private val hiddenSize = 5
+  private val outputSize = 1
   test("ANN learns XOR function") {
-    val inputs = Array[Array[Double]](
-      Array[Double](0,0),
-      Array[Double](0,1),
-      Array[Double](1,0),
-      Array[Double](1,1)
-    )
-    val outputs = Array[Double](0, 1, 1, 0)
     val data = inputs.zip(outputs).map { case(features, label) =>
       (Vectors.dense(features), Vectors.dense(Array(label)))}
     val rddData = sc.parallelize(data, 2)
-    val hiddenLayersTopology = Array[Int](5)
-    val initialWeights = ArtificialNeuralNetwork.randomWeights(rddData, hiddenLayersTopology, 0x01234567)
-    val model = ArtificialNeuralNetwork.train(rddData, hiddenLayersTopology, initialWeights, 200)
+    val ann = new ParallelANN(Array[Int](inputSize, hiddenSize, outputSize))
+    ann.optimizer.setNumIterations(2000).setStepSize(2.0)
+    val model = ann.train(rddData)
     val predictionAndLabels = rddData.map { case(input, label) =>
-      (model.predict(input)(0), label(0)) }.collect()
+      (model.predictV(input)(0), label(0)) }.collect()
     assert(predictionAndLabels.forall { case(p, l) => (math.round(p) - l) == 0 })
   }
-
-  test("Gradient of ANN") {
-    val eps = 1e-6
-    val accept = 1e-7
-    val topologyArr = Array[Array[Int]](
-      Array[Int](1, 5, 1),
-      Array[Int](5, 10, 5, 3),
-      Array[Int](128, 256, 128)
-    )
-    val rnd = new XORShiftRandom(0)
-    var cnt = 0
-    while( cnt<topologyArr.length ) {
-      val topology = topologyArr(cnt)
-      val L = topology.length - 1
-      val noInp = topology(0)
-      val noOut = topology(L)
-      val annGradient = new ANNLeastSquaresGradient(topology)
-      var noWeights = 0
-      var l = 1
-      while(l <= L) {
-        noWeights += (topology(l - 1) + 1) * topology(l)
-        l += 1
-      }
-      val arrWeights = new Array[Double](noWeights)
-      var w = 0
-      while(w < noWeights) {
-        arrWeights(w) = rnd.nextDouble() * 4.8 - 2.4
-        w += 1
-      }
-      val arrInp = new Array[Double](noInp)
-      val arrOut = new Array[Double](noOut)
-      val arrData = new Array[Double](noInp + noOut)
-      w = 0
-      while(w < noInp) {
-        arrInp(w) = rnd.nextDouble()
-        arrData(w) = arrInp(w)
-        w += 1
-      }
-      w = 0
-      while(w < noOut) {
-        arrOut(w) = rnd.nextDouble()
-        arrData(noInp + w) = arrOut(w)
-        w += 1
-      }
-      val data = Vectors.dense( arrData )
-      val brzOut = Vectors.dense( arrOut ).toBreeze
-      val weights = Vectors.dense( arrWeights )
-      val gradient = annGradient.compute( data, 0.0, weights )._1
-      val arrTmpWeights = new Array[Double]( noWeights )
-      Array.copy(arrWeights, 0, arrTmpWeights, 0, noWeights )
-      val tmpWeights = Vectors.dense( arrTmpWeights )
-      w = 0
-      while(w < noWeights)
-      {
-        arrTmpWeights(w) = arrTmpWeights(w) + eps
-        val annModel1 = new ArtificialNeuralNetworkModel(weights, topology)
-        val brzO1 = annModel1.predict(data).toBreeze
-        val annModel2 = new ArtificialNeuralNetworkModel(tmpWeights, topology)
-        val brzO2 = annModel2.predict(data).toBreeze
-        val E1 = .5* (brzO1 - brzOut).dot(brzO1 - brzOut)
-        val E2 = .5* (brzO2 - brzOut).dot(brzO2 - brzOut)
-        val dEdW = ( E2 - E1 ) / eps
-        val gradw = gradient(w)
-        val err = dEdW - gradw
-        assert(math.abs(err) < accept,
-      s"Difference between calculated and approximated gradient too large ($dEdW - $gradw = $err)"
-        )
-        arrTmpWeights(w) = arrTmpWeights(w) - eps
-        w += 1
-      }
-      cnt += 1
-    }
-  }
 }

From c22c3dc31434fb2d43e2c1edacec898b905ff491 Mon Sep 17 00:00:00 2001
From: Alexander Ulanov <nashb@yandex.ru>
Date: Tue, 9 Sep 2014 12:34:53 +0400
Subject: [PATCH 097/143] Removing dependency on GeneralizedModel and Algorithm

---
 .../apache/spark/mllib/ann/ParallelANN.scala  | 79 +++++++++++++------
 1 file changed, 56 insertions(+), 23 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala
index 789854da4d32a..a3a832844fb3e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala
@@ -28,7 +28,7 @@ import breeze.linalg.{axpy => brzAxpy, Vector => BV}
 import breeze.linalg.{Vector => BV}
 import breeze.linalg.{axpy => brzAxpy}
 import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.mllib.regression.RegressionModel
+import org.apache.spark.mllib.regression.{LabeledPoint, RegressionModel}
 import org.apache.spark.util.random.XORShiftRandom
 
 /*
@@ -77,29 +77,24 @@ import org.apache.spark.util.random.XORShiftRandom
  */
 
 class ParallelANNModel private[mllib] (
-    override val weights: Vector,
+    val weights: Vector,
     val topology: Array[Int] )
-  extends GeneralizedModel(weights) with RegressionModel with Serializable {
+  extends Serializable {
 
-  val L = topology.length-1
-
-  val ofsWeight: Array[Int] = {
-
-    var tmp = new Array[Int]( L + 1 )
+  private val L = topology.length - 1
 
+  private val ofsWeight: Array[Int] = {
+    val tmp = new Array[Int](L + 1)
     var curPos = 0;
-
     tmp( 0 ) = 0;
     for( l <- 1 to L ) {
       tmp( l ) = curPos
       curPos = curPos + ( topology( l - 1 ) + 1 ) * ( topology( l ) )
     }
-
     tmp
-
   }
 
-  def g( x: Double ) = 1.0 / (1.0 + math.exp( -x ) )
+  private def g( x: Double ) = 1.0 / (1.0 + math.exp( -x ) )
 
   def computeValues( arrData: Array[Double], arrWeights: Array[Double] ): Array[Double] = {
 
@@ -126,7 +121,7 @@ class ParallelANNModel private[mllib] (
 
   }
 
-  override def predictPoint( data: Vector, weights: Vector ): Double = {
+  def predictPoint( data: Vector, weights: Vector ): Double = {
     val outp = computeValues( data.toArray, weights.toArray )
     outp(0)
   }
@@ -135,6 +130,21 @@ class ParallelANNModel private[mllib] (
     Vectors.dense( computeValues( data.toArray, weights.toArray ) )
   }
 
+  /**
+   * Predict values for a single data point using the model trained.
+   *
+   * @param testData array representing a single data point
+   * @return Vector prediction from the trained model
+   *
+   * Returns the complete vector.
+   */
+  def predictV( testData: Vector ): Vector = {
+
+    predictPointV( testData, weights )
+
+  }
+
+
 }
 
 class ParallelANN(
@@ -142,13 +152,13 @@ class ParallelANN(
     private var numIterations: Int,
     private var stepSize: Double,
     private var miniBatchFraction: Double )
-  extends GeneralizedAlgorithm[ParallelANNModel] with Serializable {
+  extends Serializable {
 
   private val rand = new XORShiftRandom
 
   private val gradient = new LeastSquaresGradientANN( topology )
   private val updater = new ANNUpdater()
-  override val optimizer = new GradientDescent(gradient, updater)
+  val optimizer = new GradientDescent(gradient, updater)
     .setStepSize(stepSize)
     .setNumIterations(numIterations)
     .setMiniBatchFraction(miniBatchFraction)
@@ -173,7 +183,7 @@ class ParallelANN(
     this( Array( noInput, noHidden, noOutput ) )
   }
 
-  override protected def createModel( weights: Vector ) = {
+  protected def createModel( weights: Vector ) = {
     new ParallelANNModel( weights, topology )
   }
 
@@ -217,26 +227,49 @@ class ParallelANN(
 
   }
 
+  private def run(input: RDD[(Vector,Vector)], initialWeights: Vector): ParallelANNModel = {
+
+    val data = input.map( v => (
+      (0.0).toDouble,
+      Vectors.fromBreeze( DenseVector.vertcat(
+        v._1.toBreeze.toDenseVector,
+        v._2.toBreeze.toDenseVector ) )
+      ) )
+    val weights = optimizer.optimize(data, initialWeights)
+    createModel( weights )
+  }
+
+}
+
+object ParallelANN {
+
+  def train(
+             input: RDD[(Vector,Vector)],
+             numIterations: Int,
+             stepSize: Double,
+             regParam: Double,
+             miniBatchFraction: Double,
+             initialWeights: Vector): ParallelANNModel = {
+    null
+  }
+
 }
 
+
 class LeastSquaresGradientANN(
     topology: Array[Int] )
   extends Gradient {
 
-  def g( x: Double ) = 1.0 / (1.0 + math.exp( -x ) )
-
-  val L = topology.length-1
+  private def g( x: Double ) = 1.0 / (1.0 + math.exp( -x ) )
 
-  val noWeights = {
+  private val L = topology.length - 1
 
+  private val noWeights = {
     var tmp = 0
-
     for( i<-1 to L ) {
       tmp = tmp + topology(i) * ( topology( i - 1 ) + 1 )
     }
-
     tmp
-
   }
 
   val ofsWeight: Array[Int] = {

From d320d764598e1762db5f671550a8c41c4665124e Mon Sep 17 00:00:00 2001
From: Alexander Ulanov <nashb@yandex.ru>
Date: Tue, 9 Sep 2014 17:58:47 +0400
Subject: [PATCH 098/143] Addressing reviewers comments: interface refactoring

---
 .../mllib/ann/ArtificialNeuralNetwork.scala   | 479 +++++-----------
 .../spark/mllib/ann/GeneralizedModel.scala    | 158 ------
 .../apache/spark/mllib/ann/ParallelANN.scala  | 521 ------------------
 .../org/apache/spark/mllib/ann/ANNSuite.scala |   5 +-
 .../spark/mllib/ann/TestParallelANN.scala     | 205 ++++---
 5 files changed, 252 insertions(+), 1116 deletions(-)
 delete mode 100644 mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedModel.scala
 delete mode 100644 mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index 392f6e458523f..6ecbb18202817 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -18,7 +18,6 @@
 package org.apache.spark.mllib.ann
 
 import breeze.linalg.{DenseVector, Vector => BV, axpy => brzAxpy}
-
 import org.apache.spark.mllib.linalg.{Vector, Vectors}
 import org.apache.spark.mllib.optimization._
 import org.apache.spark.rdd.RDD
@@ -34,20 +33,15 @@ import org.apache.spark.util.random.XORShiftRandom
  *
  * NOTE: output values should be in the range [0,1]
  *
- * For a network of H hidden layers:
- *
- * hiddenLayersTopology(h) indicates the number of nodes in hidden layer h, excluding the bias
- * node. h counts from 0 (first hidden layer, taking inputs from input layer) to H - 1 (last
- * hidden layer, sending outputs to the output layer).
+ * For a network of L layers:
  *
- * hiddenLayersTopology is converted internally to topology, which adds the number of nodes
- * in the input and output layers.
+ * topology( l ) indicates the number of nodes in layer l, excluding the bias node.
  *
  * noInput = topology(0), the number of input nodes
  * noOutput = topology(L-1), the number of output nodes
  *
  * input = data( 0 to noInput-1 )
- * output = data( noInput to noInput + noOutput - 1 )
+ * output = data( noInput to noInput+noOutput-1 )
  *
  * W_ijl is the weight from node i in layer l-1 to node j in layer l
  * W_ijl goes to position ofsWeight(l) + j*(topology(l-1)+1) + i in the weights vector
@@ -74,73 +68,83 @@ import org.apache.spark.util.random.XORShiftRandom
  *
  */
 
-/**
- * Artificial neural network (ANN) model
- *
- * @param weights the weights between the neurons in the ANN.
- * @param topology array containing the number of nodes per layer in the network, including
- * the nodes in the input and output layer, but excluding the bias nodes.
- */
 class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topology: Array[Int])
-  extends Serializable with ANNHelper {
+  extends Serializable {
 
-  /**
-   * Predicts values for a single data point using the trained model.
-   *
-   * @param testData represents a single data point.
-   * @return prediction using the trained model.
-   */
-  def predict(testData: Vector): Vector = {
-    Vectors.dense(computeValues(testData.toArray, weights.toArray))
+  private val L = topology.length - 1
+
+  private val ofsWeight: Array[Int] = {
+    val tmp = new Array[Int](L + 1)
+    var curPos = 0
+    tmp(0) = 0
+    for (l <- 1 to L) {
+      tmp(l) = curPos
+      curPos = curPos + (topology(l - 1) + 1) * (topology(l))
+    }
+    tmp
+  }
+
+  private def g(x: Double) = 1.0 / (1.0 + math.exp(-x))
+
+  def computeValues(arrData: Array[Double], arrWeights: Array[Double]): Array[Double] = {
+    var arrPrev = new Array[Double](topology(0))
+    for (i <- 0 until topology(0))
+      arrPrev(i) = arrData(i)
+    for (l <- 1 to L) {
+      val arrCur = new Array[Double](topology(l))
+      for (j <- 0 until topology(l)) {
+        var cum: Double = 0.0
+        for (i <- 0 until topology(l - 1))
+          cum = cum +
+            arrPrev(i) * arrWeights(ofsWeight(l) + (topology(l - 1) + 1) * j + i)
+        cum = cum +
+          arrWeights(ofsWeight(l) + (topology(l - 1) + 1) * j + topology(l - 1)) // bias
+        arrCur(j) = g(cum)
+      }
+      arrPrev = arrCur
+    }
+    arrPrev
+  }
+
+  def predictPoint(data: Vector, weights: Vector): Double = {
+    val outp = computeValues(data.toArray, weights.toArray)
+    outp(0)
+  }
+
+  def predictPointV(data: Vector, weights: Vector): Vector = {
+    Vectors.dense(computeValues(data.toArray, weights.toArray))
   }
 
   /**
-   * Predict values for an RDD of data points using the trained model.
+   * Predict values for a single data point using the model trained.
+   *
+   * @param testData array representing a single data point
+   * @return Vector prediction from the trained model
    *
-   * @param testDataRDD RDD representing the input vectors.
-   * @return RDD with predictions using the trained model as (input, output) pairs.
+   *         Returns the complete vector.
    */
-  def predict(testDataRDD: RDD[Vector]): RDD[(Vector,Vector)] = {
-    testDataRDD.map(T => (T, predict(T)) )
-  }
-
-  private def computeValues(arrData: Array[Double], arrWeights: Array[Double]): Array[Double] = {
-    val arrNodes = forwardRun(arrData, arrWeights)
-    arrNodes.slice(arrNodes.size - topology(L), arrNodes.size)
+  def predictV(testData: Vector): Vector = {
+    predictPointV(testData, weights)
   }
 }
 
-/**
- * Performs the training of an Artificial Neural Network (ANN)
- *
- * @param topology A vector containing the number of nodes per layer in the network, including
- * the nodes in the input and output layer, but excluding the bias nodes.
- * @param maxNumIterations The maximum number of iterations for the training phase.
- * @param convergenceTol Convergence tolerance for LBFGS. Smaller value for closer convergence.
- */
-class ArtificialNeuralNetwork private[mllib](
-    topology: Array[Int],
-    maxNumIterations: Int,
-    convergenceTol: Double)
+class ArtificialNeuralNetwork private(
+                           private var topology: Array[Int],
+                           private var numIterations: Int,
+                           private var stepSize: Double,
+                           private var miniBatchFraction: Double)
   extends Serializable {
 
   private val gradient = new ANNLeastSquaresGradient(topology)
   private val updater = new ANNUpdater()
-  private val optimizer = new LBFGS(gradient, updater).
-    setConvergenceTol(convergenceTol).
-    setMaxNumIterations(maxNumIterations)
+  private val optimizer = new GradientDescent(gradient, updater)
+    .setStepSize(stepSize)
+    .setNumIterations(numIterations)
+    .setMiniBatchFraction(miniBatchFraction)
 
- /**
-   * Trains the ANN model.
-   * Uses default convergence tolerance 1e-4 for LBFGS.
-   *
-   * @param trainingRDD RDD containing (input, output) pairs for training.
-   * @param initialWeights the initial weights of the ANN
-   * @return ANN model.
-   */
-  private def run(trainingRDD: RDD[(Vector, Vector)], initialWeights: Vector):
-      ArtificialNeuralNetworkModel = {
-    val data = trainingRDD.map(v =>
+  private def run(input: RDD[(Vector, Vector)], initialWeights: Vector):
+  ArtificialNeuralNetworkModel = {
+    val data = input.map(v =>
       (0.0,
         Vectors.fromBreeze(DenseVector.vertcat(
           v._1.toBreeze.toDenseVector,
@@ -151,375 +155,194 @@ class ArtificialNeuralNetwork private[mllib](
   }
 }
 
-/**
- * Top level methods for training the artificial neural network (ANN)
- */
 object ArtificialNeuralNetwork {
 
-  private val defaultTolerance: Double = 1e-4
-
-  /**
-   * Trains an ANN.
-   * Uses default convergence tolerance 1e-4 for LBFGS.
-   *
-   * @param trainingRDD RDD containing (input, output) pairs for training.
-   * @param hiddenLayersTopology number of nodes per hidden layer, excluding the bias nodes.
-   * @param maxNumIterations specifies maximum number of training iterations.
-   * @return ANN model.
-   */
-  def train(
-      trainingRDD: RDD[(Vector, Vector)],
-      hiddenLayersTopology: Array[Int],
-      maxNumIterations: Int): ArtificialNeuralNetworkModel = {
-    train(trainingRDD, hiddenLayersTopology, maxNumIterations, defaultTolerance)
-  }
-
-  /**
-   * Continues training of an ANN.
-   * Uses default convergence tolerance 1e-4 for LBFGS.
-   *
-   * @param trainingRDD RDD containing (input, output) pairs for training.
-   * @param model model of an already partly trained ANN.
-   * @param maxNumIterations maximum number of training iterations.
-   * @return ANN model.
-   */
-  def train(
-      trainingRDD: RDD[(Vector,Vector)],
-      model: ArtificialNeuralNetworkModel,
-      maxNumIterations: Int): ArtificialNeuralNetworkModel = {
-    train(trainingRDD, model, maxNumIterations, defaultTolerance)
-  }
-
-  /**
-   * Trains an ANN with given initial weights.
-   * Uses default convergence tolerance 1e-4 for LBFGS.
-   *
-   * @param trainingRDD RDD containing (input, output) pairs for training.
-   * @param initialWeights initial weights vector.
-   * @param maxNumIterations maximum number of training iterations.
-   * @return ANN model.
-   */
   def train(
-      trainingRDD: RDD[(Vector,Vector)],
-      hiddenLayersTopology: Array[Int],
-      initialWeights: Vector,
-      maxNumIterations: Int): ArtificialNeuralNetworkModel = {
-    train(trainingRDD, hiddenLayersTopology, initialWeights, maxNumIterations, defaultTolerance)
+             input: RDD[(Vector, Vector)],
+             topology: Array[Int],
+             initialWeights: Vector,
+             numIterations: Int,
+             stepSize: Double,
+             miniBatchFraction: Double): ArtificialNeuralNetworkModel = {
+    new ArtificialNeuralNetwork(topology, numIterations, stepSize, miniBatchFraction)
+      .run(input, initialWeights)
   }
 
-  /**
-   * Trains an ANN using customized convergence tolerance.
-   *
-   * @param trainingRDD RDD containing (input, output) pairs for training.
-   * @param model model of an already partly trained ANN.
-   * @param maxNumIterations maximum number of training iterations.
-   * @param convergenceTol convergence tolerance for LBFGS. Smaller value for closer convergence.
-   * @return ANN model.
-   */
   def train(
-      trainingRDD: RDD[(Vector,Vector)],
-      model: ArtificialNeuralNetworkModel,
-      maxNumIterations: Int,
-      convergenceTol: Double): ArtificialNeuralNetworkModel = {
-    new ArtificialNeuralNetwork(model.topology, maxNumIterations, convergenceTol).
-      run(trainingRDD, model.weights)
+             input: RDD[(Vector, Vector)],
+             topology: Array[Int],
+             initialWeights: Vector,
+             numIterations: Int,
+             stepSize: Double): ArtificialNeuralNetworkModel = {
+    new ArtificialNeuralNetwork(topology, numIterations, stepSize, 1.0).run(input, initialWeights)
   }
 
-  /**
-   * Continues training of an ANN using customized convergence tolerance.
-   *
-   * @param trainingRDD RDD containing (input, output) pairs for training.
-   * @param hiddenLayersTopology number of nodes per hidden layer, excluding the bias nodes.
-   * @param maxNumIterations maximum number of training iterations.
-   * @param convergenceTol convergence tolerance for LBFGS. Smaller value for closer convergence.
-   * @return ANN model.
-   */
   def train(
-      trainingRDD: RDD[(Vector, Vector)],
-      hiddenLayersTopology: Array[Int],
-      maxNumIterations: Int,
-      convergenceTol: Double): ArtificialNeuralNetworkModel = {
-    val topology = convertTopology(trainingRDD, hiddenLayersTopology)
-    new ArtificialNeuralNetwork(topology, maxNumIterations, convergenceTol).
-      run(trainingRDD, randomWeights(topology, false))
+             input: RDD[(Vector, Vector)],
+             topology: Array[Int],
+             numIterations: Int,
+             stepSize: Double,
+             miniBatchFraction: Double): ArtificialNeuralNetworkModel = {
+    new ArtificialNeuralNetwork(topology, numIterations, stepSize, miniBatchFraction)
+      .run(input, randomWeights(topology))
   }
 
-  /**
-   * Trains an ANN with given initial weights.
-   *
-   * @param trainingRDD RDD containing (input, output) pairs for training.
-   * @param initialWeights initial weights vector.
-   * @param maxNumIterations maximum number of training iterations.
-   * @param convergenceTol convergence tolerance for LBFGS. Smaller value for closer convergence.
-   * @return ANN model.
-   */
   def train(
-      trainingRDD: RDD[(Vector,Vector)],
-      hiddenLayersTopology: Array[Int],
-      initialWeights: Vector,
-      maxNumIterations: Int,
-      convergenceTol: Double): ArtificialNeuralNetworkModel = {
-    val topology = convertTopology(trainingRDD, hiddenLayersTopology)
-    new ArtificialNeuralNetwork(topology, maxNumIterations, convergenceTol).
-      run(trainingRDD, initialWeights)
-  }
-
-  /**
-   * Provides a random weights vector.
-   *
-   * @param trainingRDD RDD containing (input, output) pairs for training.
-   * @param hiddenLayersTopology number of nodes per hidden layer, excluding the bias nodes.
-   * @return random weights vector.
-   */
-  def randomWeights(
-      trainingRDD: RDD[(Vector,Vector)],
-      hiddenLayersTopology: Array[Int]): Vector = {
-    val topology = convertTopology(trainingRDD, hiddenLayersTopology)
-    return randomWeights(topology, false)
-  }
-
-  /**
-   * Provides a random weights vector, using given random seed.
-   *
-   * @param trainingRDD RDD containing (input, output) pairs for later training.
-   * @param hiddenLayersTopology number of nodes per hidden layer, excluding the bias nodes.
-   * @param seed random generator seed.
-   * @return random weights vector.
-   */
-  def randomWeights(
-      trainingRDD: RDD[(Vector,Vector)],
-      hiddenLayersTopology: Array[Int],
-      seed: Int): Vector = {
-    val topology = convertTopology(trainingRDD, hiddenLayersTopology)
-    return randomWeights(topology, true, seed)
-  }
-
-  /**
-   * Provides a random weights vector, using given random seed.
-   *
-   * @param inputLayerSize size of input layer.
-   * @param outputLayerSize size of output layer.
-   * @param hiddenLayersTopology number of nodes per hidden layer, excluding the bias nodes.
-   * @param seed random generator seed.
-   * @return random weights vector.
-   */
-  def randomWeights(
-                     inputLayerSize: Int,
-                     outputLayerSize: Int,
-                     hiddenLayersTopology: Array[Int],
-                     seed: Int): Vector = {
-    val topology = inputLayerSize +: hiddenLayersTopology :+ outputLayerSize
-    return randomWeights(topology, true, seed)
-  }
-
-  private def convertTopology(
-      input: RDD[(Vector,Vector)],
-      hiddenLayersTopology: Array[Int] ): Array[Int] = {
-    val firstElt = input.first
-    firstElt._1.size +: hiddenLayersTopology :+ firstElt._2.size
+             input: RDD[(Vector, Vector)],
+             topology: Array[Int],
+             numIterations: Int,
+             stepSize: Double): ArtificialNeuralNetworkModel = {
+    train(input, topology, numIterations, stepSize, 1.0)
   }
 
-  private def randomWeights(topology: Array[Int], useSeed: Boolean, seed: Int = 0): Vector = {
-    val rand: XORShiftRandom =
-      if( useSeed == false ) new XORShiftRandom() else new XORShiftRandom(seed)
-    var i: Int = 0
-    var l: Int = 0
+  def randomWeights(topology: Array[Int]): Vector = {
+    val rand = new XORShiftRandom()
     val noWeights = {
       var tmp = 0
-      var i = 1
-      while (i < topology.size) {
+      for (i <- 1 until topology.size) {
         tmp = tmp + topology(i) * (topology(i - 1) + 1)
-        i += 1
       }
       tmp
     }
+
     val initialWeightsArr = new Array[Double](noWeights)
-    var pos = 0
-    l = 1
-    while (l < topology.length) {
-      i = 0
-      while (i < (topology(l) * (topology(l - 1) + 1))) {
+    var pos = 0;
+    for (l <- 1 until topology.length) {
+      for (i <- 0 until (topology(l) * (topology(l - 1) + 1))) {
         initialWeightsArr(pos) = (rand.nextDouble * 4.8 - 2.4) / (topology(l - 1) + 1)
-        pos += 1
-        i += 1
+        pos += 1;
       }
-      l += 1
     }
     Vectors.dense(initialWeightsArr)
   }
+
 }
 
-/**
- * Helper methods for ANN
- */
-private[ann] trait ANNHelper {
-  protected val topology: Array[Int]
-  protected def g(x: Double) = 1.0 / (1.0 + math.exp(-x))
-  protected val L = topology.length - 1
-  protected val noWeights = {
+private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
+
+  private def g(x: Double) = 1.0 / (1.0 + math.exp(-x))
+
+  private val L = topology.length - 1
+
+  private val noWeights = {
     var tmp = 0
-    var l = 1
-    while (l <= L) {
-      tmp = tmp + topology(l) * (topology(l - 1) + 1)
-      l += 1
+    for (i <- 1 to L) {
+      tmp = tmp + topology(i) * (topology(i - 1) + 1)
     }
     tmp
   }
-  protected val ofsWeight: Array[Int] = {
+
+  val ofsWeight: Array[Int] = {
     val tmp = new Array[Int](L + 1)
-    var curPos = 0
-    tmp(0) = 0
-    var l = 1
-    while (l <= L) {
+    var curPos = 0;
+    tmp(0) = 0;
+    for (l <- 1 to L) {
       tmp(l) = curPos
-      curPos = curPos + (topology(l - 1) + 1) * topology(l)
-      l += 1
+      curPos = curPos + (topology(l - 1) + 1) * (topology(l))
     }
     tmp
   }
-  protected val noNodes: Int = {
+
+  val noNodes: Int = {
     var tmp: Integer = 0
-    var l = 0
-    while (l < topology.size) {
+    for (l <- 0 until topology.size) {
       tmp = tmp + topology(l)
-      l += 1
     }
     tmp
   }
-  protected val ofsNode: Array[Int] = {
+
+  val ofsNode: Array[Int] = {
     val tmp = new Array[Int](L + 1)
     tmp(0) = 0
-    var l = 1
-    while (l <= L) {
+    for (l <- 1 to L) {
       tmp(l) = tmp(l - 1) + topology(l - 1)
-      l += 1
     }
     tmp
   }
 
-  protected def forwardRun(arrData: Array[Double], arrWeights: Array[Double]): Array[Double] = {
+  override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
+    val arrData = data.toArray
+    val arrWeights = weights.toArray
     val arrNodes = new Array[Double](noNodes)
-    var i: Int = 0
-    var j: Int = 0
-    var l: Int = 0
-    i = 0
-    while (i < topology(0)) {
+    // forward run
+    for (i <- 0 until topology(0)) {
       arrNodes(i) = arrData(i)
-      i += 1
     }
-    l = 1
-    while (l <= L) {
-      j = 0
-      while (j < topology(l)) {
-        var cum: Double = 0.0
-        i = 0
-        while (i < topology(l - 1)) {
+    for (l <- 1 to L) {
+      for (j <- 0 until topology(l)) {
+        var cum: Double = 0.0;
+        for (i <- 0 until topology(l - 1)) {
           cum = cum +
             arrWeights(ofsWeight(l) + (topology(l - 1) + 1) * j + i) *
               arrNodes(ofsNode(l - 1) + i)
-          i += 1
         }
         cum = cum + arrWeights(ofsWeight(l) + (topology(l - 1) + 1) * j + topology(l - 1))
         arrNodes(ofsNode(l) + j) = g(cum)
-        j += 1
       }
-      l += 1
     }
-    arrNodes
-  }
-}
-
-private class ANNLeastSquaresGradient(val topology: Array[Int]) extends Gradient with ANNHelper {
-
-  override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
-    val arrData = data.toArray
-    val arrWeights = weights.toArray
-    var i: Int = 0
-    var j: Int = 0
-    var l: Int = 0
-    // forward run
-    val arrNodes = forwardRun(arrData, arrWeights)
     val arrDiff = new Array[Double](topology(L))
-    j = 0
-    while (j < topology(L)) {
-      arrDiff(j) = arrNodes(ofsNode(L) + j) - arrData(topology(0) + j)
-      j += 1
+    for (j <- 0 until topology(L)) {
+      arrDiff(j) = (arrNodes(ofsNode(L) + j) - arrData(topology(0) + j))
     }
-    var err: Double = 0
-    j = 0
-    while (j < topology(L)) {
+    var err: Double = 0;
+    for (j <- 0 until topology(L)) {
       err = err + arrDiff(j) * arrDiff(j)
-      j += 1
     }
     err = err * .5
     // back propagation
     val arrDelta = new Array[Double](noNodes)
-    j = 0
-    while (j < topology(L)) {
+    for (j <- 0 until topology(L)) {
       arrDelta(ofsNode(L) + j) =
         arrDiff(j) *
           arrNodes(ofsNode(L) + j) * (1 - arrNodes(ofsNode(L) + j))
-      j += 1
     }
-    l = L - 1
-    while (l > 0) {
-      j = 0
-      while (j < topology(l)) {
+    for (l <- L - 1 until 0 by -1) {
+      for (j <- 0 until topology(l)) {
         var cum: Double = 0.0
-        i = 0
-        while (i < topology(l + 1)) {
+        for (i <- 0 until topology(l + 1)) {
           cum = cum +
             arrWeights(ofsWeight(l + 1) + (topology(l) + 1) * i + j) *
               arrDelta(ofsNode(l + 1) + i) *
               arrNodes(ofsNode(l) + j) * (1 - arrNodes(ofsNode(l) + j))
-          i += 1
         }
         arrDelta(ofsNode(l) + j) = cum
-        j += 1
       }
-      l -= 1
     }
     // gradient
     val arrGrad = new Array[Double](noWeights)
-    l = 1
-    while (l <= L) {
-      j = 0
-      while (j < topology(l)) {
-        i = 0
-        while (i < topology(l - 1)) {
+    for (l <- 1 to L) {
+      for (j <- 0 until topology(l)) {
+        for (i <- 0 until topology(l - 1)) {
           arrGrad(ofsWeight(l) + (topology(l - 1) + 1) * j + i) =
             arrNodes(ofsNode(l - 1) + i) *
               arrDelta(ofsNode(l) + j)
-          i += 1
         }
         arrGrad(ofsWeight(l) + (topology(l - 1) + 1) * j + topology(l - 1)) =
           arrDelta(ofsNode(l) + j)
-        j += 1
       }
-      l += 1
     }
     (Vectors.dense(arrGrad), err)
   }
 
   override def compute(
-      data: Vector,
-      label: Double,
-      weights: Vector,
-      cumGradient: Vector): Double = {
+                        data: Vector,
+                        label: Double,
+                        weights: Vector,
+                        cumGradient: Vector): Double = {
     val (grad, err) = compute(data, label, weights)
     cumGradient.toBreeze += grad.toBreeze
-    err
+    return err
   }
 }
 
 private class ANNUpdater extends Updater {
 
   override def compute(
-      weightsOld: Vector,
-      gradient: Vector,
-      stepSize: Double,
-      iter: Int,
-      regParam: Double): (Vector, Double) = {
+                        weightsOld: Vector,
+                        gradient: Vector,
+                        stepSize: Double,
+                        iter: Int,
+                        regParam: Double): (Vector, Double) = {
     val thisIterStepSize = stepSize
     val brzWeights: BV[Double] = weightsOld.toBreeze.toDenseVector
     brzAxpy(-thisIterStepSize, gradient.toBreeze, brzWeights)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedModel.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedModel.scala
deleted file mode 100644
index ba3e31ae9d6da..0000000000000
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/GeneralizedModel.scala
+++ /dev/null
@@ -1,158 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.mllib.ann
-
-import breeze.linalg.{DenseVector => BDV, SparseVector => BSV}
-import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.Logging
-import org.apache.spark.rdd.RDD
-import org.apache.spark.mllib.optimization._
-import org.apache.spark.mllib.linalg.{Vectors, Vector}
-import breeze.linalg.DenseVector
-import breeze.linalg.{DenseVector => BDV}
-import breeze.linalg.{SparseVector => BSV}
-
-/**
- * :: DeveloperApi ::
- * GeneralizedModel represents a model trained using
- * GeneralizedAlgorithm.
- *
- * @param weights Weights computed for every feature.
- */
-@DeveloperApi
-abstract class GeneralizedModel(val weights: Vector )
-
-  extends Serializable {
-
-  /**
-   * Predict the result given a data point and the weights learned.
-   *
-   * @param dataMatrix Row vector containing the features for this data point
-   * @param weightMatrix Column vector containing the weights of the model
-   *
-   * If the prediction model consists of a multi-dimensional vector, predictPoint
-   * returns only the first element of each vector. To get the whole vector,
-   * use predictPointV instead.
-   */
-  protected def predictPoint( dataMatrix: Vector, weightMatrix: Vector ): Double
-
-  /**
-   * Predict the result given a data point and the weights learned.
-   *
-   * @param dataMatrix Row vector containing the features for this data point
-   * @param weightMatrix Column vector containing the weights of the model
-   *
-   * Returns the complete output vector.
-   */
-  protected def predictPointV( dataMatrix: Vector, weightsMatrix: Vector ): Vector
-
-  /**
-   * Predict values for the given data set using the model trained.
-   *
-   * @param testData RDD representing data points to be predicted
-   * @return RDD[Double] where each entry contains the corresponding prediction
-   *
-   * Returns only first element of output vector.
-   */
-  def predict( testData: RDD[Vector] ): RDD[Double] = {
-
-    val localWeights = weights
-    testData.map(v => predictPoint(v, localWeights ) )
-
-  }
-
-  /**
-   * Predict values for the given data set using the model trained.
-   *
-   * @param testData RDD representing data points to be predicted
-   * @return RDD[Vector] where each entry contains the corresponding prediction
-   *
-   * Returns the complete output vector.
-   */
-  def predictV( testData: RDD[Vector] ): RDD[Vector] = {
-
-    val localWeights = weights
-    testData.map( v => predictPointV( v, localWeights ) )
-
-  }
-
-  /**
-   * Predict values for a single data point using the model trained.
-   *
-   * @param testData array representing a single data point
-   * @return Double prediction from the trained model
-   *
-   * Returns only first element of output vector.
-   */
-  def predict( testData: Vector ): Double = {
-
-    predictPoint( testData, weights )
-
-  }
-
-  /**
-   * Predict values for a single data point using the model trained.
-   *
-   * @param testData array representing a single data point
-   * @return Vector prediction from the trained model
-   *
-   * Returns the complete vector.
-   */
-  def predictV( testData: Vector ): Vector = {
-
-    predictPointV( testData, weights )
-
-  }
-
-}
-
-/**
- * :: DeveloperApi ::
- * GeneralizedAlgorithm implements methods to train a function.
- * This class should be extended with an Optimizer to create a new GM.
- */
-@DeveloperApi
-abstract class GeneralizedAlgorithm[M <: GeneralizedModel]
-  extends Logging with Serializable {
-
-  /** The optimizer to solve the problem. */
-  def optimizer: Optimizer
-
-  /**
-   * Create a model given the weights
-   */
-  protected def createModel(weights: Vector): M
-
-  /**
-   * Run the algorithm with the configured parameters on an input RDD
-   * of (Vector,Vector) entries starting from the initial weights provided.
-   */
-  def run(input: RDD[(Vector,Vector)], initialWeights: Vector): M = {
-
-    val data = input.map( v => (
-      (0.0).toDouble,
-      Vectors.fromBreeze( DenseVector.vertcat(
-        v._1.toBreeze.toDenseVector,
-        v._2.toBreeze.toDenseVector ) )
-      ) )
-    val weights = optimizer.optimize(data, initialWeights)
-
-    createModel( weights )
-
-  }
-}
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala
deleted file mode 100644
index a3a832844fb3e..0000000000000
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ParallelANN.scala
+++ /dev/null
@@ -1,521 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.mllib.ann
-
-import org.apache.spark.rdd.RDD
-import org.apache.spark.mllib.linalg.Vector
-import org.apache.spark.mllib.optimization._
-import org.apache.spark.mllib.linalg.Vectors
-import breeze.linalg.DenseVector
-import org.apache.spark.mllib.linalg.Vectors
-import org.apache.spark.rdd.RDD
-import breeze.linalg.{axpy => brzAxpy, Vector => BV}
-import breeze.linalg.{Vector => BV}
-import breeze.linalg.{axpy => brzAxpy}
-import org.apache.spark.annotation.DeveloperApi
-import org.apache.spark.mllib.regression.{LabeledPoint, RegressionModel}
-import org.apache.spark.util.random.XORShiftRandom
-
-/*
- * Implements a Artificial Neural Network (ANN)
- *
- * The data consists of an input vector and an output vector, combined into a single vector
- * as follows:
- *
- * [ ---input--- ---output--- ]
- *
- * NOTE: output values should be in the range [0,1]
- *
- * For a network of L layers:
- *
- * topology( l ) indicates the number of nodes in layer l, excluding the bias node.
- *
- * noInput = topology(0), the number of input nodes
- * noOutput = topology(L-1), the number of output nodes
- *
- * input = data( 0 to noInput-1 )
- * output = data( noInput to noInput+noOutput-1 )
- *
- * W_ijl is the weight from node i in layer l-1 to node j in layer l
- * W_ijl goes to position ofsWeight(l) + j*(topology(l-1)+1) + i in the weights vector
- *
- * B_jl is the bias input of node j in layer l
- * B_jl goes to position ofsWeight(l) + j*(topology(l-1)+1) + topology(l-1) in the weights vector
- *
- * error function: E( O, Y ) = sum( O_j - Y_j )
- * (with O = (O_0, ..., O_(noOutput-1)) the output of the ANN,
- * and (Y_0, ..., Y_(noOutput-1)) the input)
- *
- * node_jl is node j in layer l
- * node_jl goes to position ofsNode(l) + j
- *
- * The weights gradient is defined as dE/dW_ijl and dE/dB_jl
- * It has same mapping as W_ijl and B_jl
- *
- * For back propagation:
- * delta_jl = dE/dS_jl, where S_jl the output of node_jl, but before applying the sigmoid
- * delta_jl has the same mapping as node_jl
- *
- * Where E = ((estOutput-output),(estOutput-output)),
- * the inner product of the difference between estimation and target output with itself.
- *
- */
-
-class ParallelANNModel private[mllib] (
-    val weights: Vector,
-    val topology: Array[Int] )
-  extends Serializable {
-
-  private val L = topology.length - 1
-
-  private val ofsWeight: Array[Int] = {
-    val tmp = new Array[Int](L + 1)
-    var curPos = 0;
-    tmp( 0 ) = 0;
-    for( l <- 1 to L ) {
-      tmp( l ) = curPos
-      curPos = curPos + ( topology( l - 1 ) + 1 ) * ( topology( l ) )
-    }
-    tmp
-  }
-
-  private def g( x: Double ) = 1.0 / (1.0 + math.exp( -x ) )
-
-  def computeValues( arrData: Array[Double], arrWeights: Array[Double] ): Array[Double] = {
-
-    var arrPrev = new Array[Double]( topology( 0 ) )
-
-    for( i <- 0 until topology( 0 ) )
-      arrPrev( i ) = arrData( i )
-
-    for( l <- 1 to L ) {
-      val arrCur = new Array[Double]( topology( l ) )
-      for( j <- 0 until topology( l ) ) {
-        var cum: Double = 0.0
-        for( i <-0 until topology( l-1 ) )
-          cum = cum +
-            arrPrev( i ) * arrWeights( ofsWeight( l ) + ( topology( l-1 ) + 1 ) * j + i )
-        cum = cum +
-          arrWeights( ofsWeight( l ) + ( topology( l-1 ) + 1 )*j + topology( l-1 ) ) // bias
-        arrCur( j ) = g( cum )
-      }
-      arrPrev = arrCur;
-    }
-
-    arrPrev
-
-  }
-
-  def predictPoint( data: Vector, weights: Vector ): Double = {
-    val outp = computeValues( data.toArray, weights.toArray )
-    outp(0)
-  }
-
-  def predictPointV( data: Vector, weights: Vector): Vector = {
-    Vectors.dense( computeValues( data.toArray, weights.toArray ) )
-  }
-
-  /**
-   * Predict values for a single data point using the model trained.
-   *
-   * @param testData array representing a single data point
-   * @return Vector prediction from the trained model
-   *
-   * Returns the complete vector.
-   */
-  def predictV( testData: Vector ): Vector = {
-
-    predictPointV( testData, weights )
-
-  }
-
-
-}
-
-class ParallelANN(
-    private var topology: Array[Int],
-    private var numIterations: Int,
-    private var stepSize: Double,
-    private var miniBatchFraction: Double )
-  extends Serializable {
-
-  private val rand = new XORShiftRandom
-
-  private val gradient = new LeastSquaresGradientANN( topology )
-  private val updater = new ANNUpdater()
-  val optimizer = new GradientDescent(gradient, updater)
-    .setStepSize(stepSize)
-    .setNumIterations(numIterations)
-    .setMiniBatchFraction(miniBatchFraction)
-
-  val noWeights = {
-
-    var tmp = 0
-
-    for( i<-1 until topology.size ) {
-      tmp = tmp + topology(i) * (topology(i-1) + 1)
-    }
-
-    tmp
-
-  }
-
-  def this( topology: Array[Int] ) = {
-    this( topology, 100, 1.0, 1.0 )
-  }
-
-  def this( noInput: Int, noHidden: Int, noOutput: Int ) = {
-    this( Array( noInput, noHidden, noOutput ) )
-  }
-
-  protected def createModel( weights: Vector ) = {
-    new ParallelANNModel( weights, topology )
-  }
-
-  def train( rdd: RDD[(Vector,Vector)] ): ParallelANNModel = {
-
-    val ft = rdd.first()
-
-    assert( topology( 0 ) == ft._1.size )
-    assert( topology( topology.length-1 ) == ft._2.size )
-
-    val initialWeightsArr = new Array[Double](noWeights)
-
-    var pos = 0;
-
-    for( l <- 1 until topology.length ) {
-      for( i <- 0 until ( topology( l ) * ( topology( l - 1 ) + 1 ) ) ) {
-        initialWeightsArr( pos ) = ( rand.nextDouble * 4.8 - 2.4 ) / ( topology( l - 1 ) + 1)
-      pos = pos + 1;
-      }
-    }
-
-    assert( pos == noWeights )
-
-    val initialWeights = Vectors.dense( initialWeightsArr )
-
-    run( rdd, initialWeights )
-
-  }
-
-  def train( rdd: RDD[(Vector,Vector)], model: ParallelANNModel ): ParallelANNModel = {
-
-    run( rdd, model.weights )
-
-  }
-
-  def train( rdd: RDD[(Vector,Vector)], weights: Vector ): ParallelANNModel = {
-
-    val ft = rdd.first()
-    assert( weights.size == noWeights )
-    run( rdd, weights );
-
-  }
-
-  private def run(input: RDD[(Vector,Vector)], initialWeights: Vector): ParallelANNModel = {
-
-    val data = input.map( v => (
-      (0.0).toDouble,
-      Vectors.fromBreeze( DenseVector.vertcat(
-        v._1.toBreeze.toDenseVector,
-        v._2.toBreeze.toDenseVector ) )
-      ) )
-    val weights = optimizer.optimize(data, initialWeights)
-    createModel( weights )
-  }
-
-}
-
-object ParallelANN {
-
-  def train(
-             input: RDD[(Vector,Vector)],
-             numIterations: Int,
-             stepSize: Double,
-             regParam: Double,
-             miniBatchFraction: Double,
-             initialWeights: Vector): ParallelANNModel = {
-    null
-  }
-
-}
-
-
-class LeastSquaresGradientANN(
-    topology: Array[Int] )
-  extends Gradient {
-
-  private def g( x: Double ) = 1.0 / (1.0 + math.exp( -x ) )
-
-  private val L = topology.length - 1
-
-  private val noWeights = {
-    var tmp = 0
-    for( i<-1 to L ) {
-      tmp = tmp + topology(i) * ( topology( i - 1 ) + 1 )
-    }
-    tmp
-  }
-
-  val ofsWeight: Array[Int] = {
-
-    var tmp = new Array[Int]( L + 1 )
-    var curPos = 0;
-
-    tmp( 0 ) = 0;
-    for( l <- 1 to L ) {
-      tmp( l ) = curPos
-      curPos = curPos + ( topology( l - 1 ) + 1 ) * ( topology( l ) )
-    }
-
-    tmp
-
-  }
-
-  val noNodes: Int = {
-
-    var tmp: Integer = 0
-
-    for( l <-0 until topology.size ) {
-      tmp = tmp + topology( l )
-    }
-
-    tmp
-
-  }
-
-  val ofsNode: Array[Int] = {
-
-    var tmp = new Array[Int]( L + 1 )
-    tmp( 0 ) = 0
-
-    for( l <-1 to L ) {
-      tmp( l ) = tmp( l - 1 ) + topology( l - 1 )
-    }
-
-    tmp
-
-  }
-
-  /* For verification only
-  def calcErr( arrData: Array[Double], arrWeights: Array[Double] ): Double = {
-
-    var arrPrev = new Array[Double]( topology( 0 ) )
-
-    for( i <- 0 until topology( 0 ) )
-      arrPrev( i ) = arrData( i )
-
-    for( l <- 1 to L ) {
-      val arrCur = new Array[Double]( topology( l ) )
-      for( j <- 0 until topology( l ) ) {
-        var cum: Double = 0.0
-        for( i <-0 until topology( l-1 ) ) {
-          cum = cum +
-            arrPrev( i ) * arrWeights( ofsWeight( l ) + ( topology( l-1 ) + 1 ) * j + i )
-        }
-        cum = cum +
-          arrWeights( ofsWeight( l ) + ( topology( l-1 ) + 1 )*j + topology( l-1 ) ) // bias
-        arrCur( j ) = g( cum )
-      }
-      arrPrev = arrCur;
-    }
-
-    val arrDiff = new Array[Double]( topology( L ) )
-    for( j <- 0 until topology( L ) ) {
-      arrDiff( j ) = ( arrPrev( j ) - arrData( topology(0) + j ) )
-    }
-
-    var err: Double = 0;
-    for( j <-0 until topology( L ) ) {
-      err = err + arrDiff( j )*arrDiff( j )
-    }
-
-    err*.5
-  }
-  */
-
-  override def compute( data: Vector, label: Double, weights: Vector ): ( Vector, Double ) = {
-
-    val arrData = data.toArray
-    val arrWeights = weights.toArray
-    val arrNodes = new Array[Double]( noNodes )
-
-    /*
-     * nodes
-     */
-
-    for( i <- 0 until topology( 0 ) ) {
-      arrNodes( i ) = arrData( i )
-    }
-
-    for( l <- 1 to L ) {
-      for( j <- 0 until topology( l ) ) {
-        var cum: Double = 0.0;
-        for( i <- 0 until topology( l-1 ) ) {
-          cum = cum +
-            arrWeights( ofsWeight( l ) + ( topology( l-1 ) + 1 ) * j + i ) *
-            arrNodes( ofsNode( l-1 ) + i )
-        }
-        cum = cum + arrWeights( ofsWeight( l ) + ( topology( l-1 ) +  1 )*j + topology( l-1 ) )
-        arrNodes( ofsNode( l ) + j ) = g( cum )
-      }
-    }
-
-    val arrDiff = new Array[Double]( topology( L ) )
-    for( j <- 0 until topology( L ) ) {
-      arrDiff( j ) = ( arrNodes( ofsNode( L ) + j ) - arrData( topology(0) + j ) )
-    }
-
-    var err: Double = 0;
-    for( j <-0 until topology( L ) ) {
-      err = err + arrDiff( j )*arrDiff( j )
-    }
-    err = err*.5
-
-    /*
-     * back propagation
-     */
-
-    val arrDelta = new Array[Double]( noNodes )
-
-    for( j <- 0 until topology( L ) ) {
-      arrDelta( ofsNode( L ) + j ) =
-        arrDiff( j ) *
-        arrNodes( ofsNode( L ) + j ) * ( 1 - arrNodes( ofsNode( L ) + j ) )
-    }
-
-    for( l <- L-1 until 0 by -1 ) {
-      for( j <- 0 until topology( l ) ) {
-        var cum: Double = 0.0
-        for( i <- 0 until topology( l + 1 ) ) {
-          cum = cum +
-            arrWeights( ofsWeight( l + 1 ) + ( topology( l ) + 1 ) * i + j ) *
-            arrDelta( ofsNode( l + 1 ) + i )  *
-            arrNodes( ofsNode( l ) + j ) * ( 1 - arrNodes( ofsNode( l ) + j ) )
-        }
-        arrDelta( ofsNode( l ) + j ) = cum
-      }
-    }
-
-    /*
-     * gradient
-     */
-
-    /* for verification only
-    val arrWcopy = new Array[Double]( noWeights )
-    Array.copy(arrWeights, 0, arrWcopy, 0, noWeights )
-    val eps = 0.000001
-    val errGradAccept = 5e-6
-    */
-
-    val arrGrad = new Array[Double]( noWeights )
-
-    for( l <- 1 to L ) {
-      for( j <-0 until topology( l ) ) {
-        for( i <- 0 until topology( l-1 ) ) {
-          arrGrad( ofsWeight( l ) + ( topology( l - 1 ) + 1 ) * j + i ) =
-            arrNodes( ofsNode( l - 1 ) + i ) *
-            arrDelta( ofsNode( l ) + j )
-
-          /* for verification only
-          val tmpErr0 = calcErr( arrData, arrWcopy )
-          arrWcopy( ofsWeight( l ) + ( topology( l -1 ) + 1 ) * j + i ) =
-            arrWcopy( ofsWeight( l ) + ( topology( l -1 ) + 1 ) * j + i ) + eps
-          val tmpErr1 = calcErr( arrData, arrWcopy )
-          arrWcopy( ofsWeight( l ) + ( topology( l -1 ) + 1 ) * j + i ) =
-            arrWcopy( ofsWeight( l ) + ( topology( l -1 ) + 1 ) * j + i ) - eps
-          val dE = ( tmpErr1 - tmpErr0 ) / eps
-
-          val errGrad =
-            math.abs( dE - arrGrad( ofsWeight( l ) +
-            ( topology( l - 1 ) + 1 ) * j + i ) )
-
-          try {
-            assert( errGrad < errGradAccept )
-          }
-          catch {
-            case e: AssertionError =>
-              println( (dE, arrGrad( ofsWeight( l ) +
-                ( topology( l - 1 ) + 1 ) * j + i ), errGrad ) )
-          }
-          */
-        }
-
-        arrGrad( ofsWeight( l ) + ( topology( l - 1 ) + 1 ) * j + topology( l-1 )  ) =
-          arrDelta( ofsNode( l ) + j )
-
-        /* for verification only
-        val tmpErr0 = calcErr( arrData, arrWcopy )
-        arrWcopy( ofsWeight( l ) + ( topology( l - 1 ) + 1 ) * j + topology( l-1 ) ) =
-          arrWcopy( ofsWeight( l ) + ( topology( l - 1 ) + 1 ) * j + topology( l-1 ) ) + eps
-        val tmpErr1 = calcErr( arrData, arrWcopy )
-        arrWcopy( ofsWeight( l ) + ( topology( l - 1 ) + 1 ) * j + topology( l-1 ) ) =
-          arrWcopy( ofsWeight( l ) + ( topology( l - 1 ) + 1 ) * j + topology( l-1 ) ) - eps
-        val dE = ( tmpErr1 - tmpErr0 ) / eps
-
-        val errGrad = math.abs( dE -
-          arrGrad( ofsWeight( l ) + ( topology( l - 1 ) + 1 ) * j + topology( l-1 ) ) )
-
-        try {
-          assert( errGrad < errGradAccept )
-        }
-        catch {
-          case e: AssertionError =>
-            println( (dE,
-              arrGrad( ofsWeight( l ) + ( topology( l - 1 ) + 1 ) * j + topology( l-1 ) ),
-              errGrad ) )
-        }
-        */
-
-      }
-    }
-
-    ( Vectors.dense( arrGrad ), err )
-
-  }
-
-  override def compute(
-      data: Vector,
-      label: Double,
-      weights: Vector,
-      cumGradient: Vector): Double = {
-
-    val (grad, err) = compute( data, label, weights )
-    cumGradient.toBreeze += grad.toBreeze
-    return err
-
-  }
-}
-
-class ANNUpdater extends Updater {
-
-  override def compute(
-      weightsOld: Vector,
-      gradient: Vector,
-      stepSize: Double,
-      iter: Int,
-      regParam: Double): (Vector, Double) = {
-
-    val thisIterStepSize = stepSize
-    val brzWeights: BV[Double] = weightsOld.toBreeze.toDenseVector
-    brzAxpy(-thisIterStepSize, gradient.toBreeze, brzWeights)
-    (Vectors.fromBreeze(brzWeights), 0)
-
-  }
-
-}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
index ff6eb51377d78..30f51a973712f 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
@@ -19,9 +19,8 @@ class ANNSuite extends FunSuite with LocalSparkContext {
     val data = inputs.zip(outputs).map { case(features, label) =>
       (Vectors.dense(features), Vectors.dense(Array(label)))}
     val rddData = sc.parallelize(data, 2)
-    val ann = new ParallelANN(Array[Int](inputSize, hiddenSize, outputSize))
-    ann.optimizer.setNumIterations(2000).setStepSize(2.0)
-    val model = ann.train(rddData)
+    val topology = Array[Int](inputSize, hiddenSize, outputSize)
+    val model = ArtificialNeuralNetwork.train(rddData, topology, 2000, 2.0, 1.0)
     val predictionAndLabels = rddData.map { case(input, label) =>
       (model.predictV(input)(0), label(0)) }.collect()
     assert(predictionAndLabels.forall { case(p, l) => (math.round(p) - l) == 0 })
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
index a097df0c5f520..b528da19dbfb9 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
@@ -18,27 +18,27 @@
 
 package org.apache.spark.mllib.ann
 
+import java.text.SimpleDateFormat
+import java.util.Calendar
+
 import org.apache.spark._
-import org.apache.spark.mllib.regression._
 import org.apache.spark.mllib.linalg._
-import org.apache.spark.mllib.ann._
+
 import scala.util.Random
-import java.util.Calendar
-import java.text.SimpleDateFormat
 
 object TestParallelANN {
 
-  var rand = new Random( 0 )
+  var rand = new Random(0)
 
-  def generateInput2D( f: Double => Double, xmin: Double, xmax: Double, noPoints: Int ): Array[(Vector,Vector)] =
-  {
+  def generateInput2D(f: Double => Double, xmin: Double, xmax: Double, noPoints: Int):
+  Array[(Vector, Vector)] = {
 
-    var out = new Array[(Vector,Vector)](noPoints)
+    var out = new Array[(Vector, Vector)](noPoints)
 
-    for( i <- 0 to noPoints - 1 ) {
-      val x = xmin + rand.nextDouble()*(xmax - xmin)
+    for (i <- 0 to noPoints - 1) {
+      val x = xmin + rand.nextDouble() * (xmax - xmin)
       val y = f(x)
-      out(i) = ( Vectors.dense( x ), Vectors.dense( y ) )
+      out(i) = (Vectors.dense(x), Vectors.dense(y))
     }
 
     return out
@@ -46,21 +46,21 @@ object TestParallelANN {
   }
 
 
-  def generateInput3D( f: (Double,Double) => Double, xmin: Double, xmax: Double, ymin: Double, ymax: Double, noPoints: Int ): Array[(Vector,Vector)] = {
+  def generateInput3D(f: (Double, Double) => Double, xmin: Double, xmax: Double, ymin: Double, ymax: Double, noPoints: Int): Array[(Vector, Vector)] = {
 
-    var out = new Array[(Vector,Vector)](noPoints)
+    var out = new Array[(Vector, Vector)](noPoints)
 
-    for( i <- 0 to noPoints - 1 ) {
+    for (i <- 0 to noPoints - 1) {
 
-      val x = xmin + rand.nextDouble()*(xmax - xmin)
-      val y = ymin + rand.nextDouble()*(ymax - ymin)
-      val z = f( x, y )
+      val x = xmin + rand.nextDouble() * (xmax - xmin)
+      val y = ymin + rand.nextDouble() * (ymax - ymin)
+      val z = f(x, y)
 
       var arr = new Array[Double](2)
 
       arr(0) = x
       arr(1) = y
-      out(i) = ( Vectors.dense( arr ), Vectors.dense( z ) )
+      out(i) = (Vectors.dense(arr), Vectors.dense(z))
 
     }
 
@@ -68,13 +68,13 @@ object TestParallelANN {
 
   }
 
-  def generateInput4D( f: Double => (Double,Double,Double), tmin: Double, tmax: Double, noPoints: Int ): Array[(Vector,Vector)] = {
+  def generateInput4D(f: Double => (Double, Double, Double), tmin: Double, tmax: Double, noPoints: Int): Array[(Vector, Vector)] = {
 
-    var out = new Array[(Vector,Vector)](noPoints)
+    var out = new Array[(Vector, Vector)](noPoints)
 
-    for( i <- 0 to noPoints - 1 ) {
+    for (i <- 0 to noPoints - 1) {
 
-      val t: Double = tmin + rand.nextDouble()*(tmax - tmin)
+      val t: Double = tmin + rand.nextDouble() * (tmax - tmin)
       var arr = new Array[Double](3)
       var F = f(t)
 
@@ -82,59 +82,59 @@ object TestParallelANN {
       arr(1) = F._2
       arr(2) = F._3
 
-      out(i) = ( Vectors.dense( t ), Vectors.dense( arr ) )
+      out(i) = (Vectors.dense(t), Vectors.dense(arr))
     }
 
     out
 
   }
 
-  def f( T: Double ): Double = {
-    val y = 0.5 + Math.abs(T/5).toInt.toDouble*.15 + math.sin(T*math.Pi/10)*.1
-    assert( y<= 1)
+  def f(T: Double): Double = {
+    val y = 0.5 + Math.abs(T / 5).toInt.toDouble * .15 + math.sin(T * math.Pi / 10) * .1
+    assert(y <= 1)
     y
   }
 
-  def f3D( x: Double, y: Double ): Double = {
-    .5 + .24*Math.sin( x*2*math.Pi/10 ) + .24*Math.cos( y*2*math.Pi/10 )
+  def f3D(x: Double, y: Double): Double = {
+    .5 +.24 * Math.sin(x * 2 * math.Pi / 10) +.24 * Math.cos(y * 2 * math.Pi / 10)
   }
 
-  def f4D( t: Double ): (Double, Double,Double) = {
-    val x = Math.abs(.8*Math.cos( t*2*math.Pi/20 ) ) + .1
-    val y = (11 + t)/22
-    val z = .5 + .35*Math.sin(t*2*math.Pi/5)*Math.cos( t*2*math.Pi/10 ) + .15*t/11
-    ( x, y, z )
+  def f4D(t: Double): (Double, Double, Double) = {
+    val x = Math.abs(.8 * Math.cos(t * 2 * math.Pi / 20)) + .1
+    val y = (11 + t) / 22
+    val z =.5 +.35 * Math.sin(t * 2 * math.Pi / 5) * Math.cos(t * 2 * math.Pi / 10) +.15 * t / 11
+    (x, y, z)
   }
 
-  def concat( v1: Vector, v2: Vector ): Vector = {
+  def concat(v1: Vector, v2: Vector): Vector = {
 
     var a1 = v1.toArray
     var a2 = v2.toArray
-    var a3 = new Array[Double]( a1.size + a2.size )
+    var a3 = new Array[Double](a1.size + a2.size)
 
-    for( i <- 0 to a1.size - 1 ) {
+    for (i <- 0 to a1.size - 1) {
       a3(i) = a1(i)
     }
 
-    for( i <- 0 to a2.size - 1 ) {
+    for (i <- 0 to a2.size - 1) {
       a3(i + a1.size) = a2(i)
     }
 
-    Vectors.dense( a3 )
+    Vectors.dense(a3)
 
   }
 
-  def main( arg: Array[String] ) {
+  def main(arg: Array[String]) {
 
-    println( "Parallel ANN tester" )
+    println("Parallel ANN tester")
     println
 
-    val formatter = new SimpleDateFormat("hh:mm:ss")    
+    val formatter = new SimpleDateFormat("hh:mm:ss")
 
     var curAngle: Double = 0.0
     var graphic: Boolean = false
 
-    if( (arg.length>0) && (arg(0)=="graph" ) ) {
+    if ((arg.length > 0) && (arg(0) == "graph")) {
       graphic = true
     }
 
@@ -142,15 +142,15 @@ object TestParallelANN {
     var outputFrame3D: OutputFrame3D = null
     var outputFrame4D: OutputFrame3D = null
 
-    if( graphic ) {
+    if (graphic) {
 
-      outputFrame2D = new OutputFrame2D( "x -> y" )
+      outputFrame2D = new OutputFrame2D("x -> y")
       outputFrame2D.apply
 
-      outputFrame3D = new OutputFrame3D( "(x,y) -> z", 1 )
+      outputFrame3D = new OutputFrame3D("(x,y) -> z", 1)
       outputFrame3D.apply
 
-      outputFrame4D = new OutputFrame3D( "t -> (x,y,z)" )
+      outputFrame4D = new OutputFrame3D("t -> (x,y,z)")
       outputFrame4D.apply
 
     }
@@ -161,102 +161,95 @@ object TestParallelANN {
     var conf = new SparkConf().setAppName("Parallel ANN").setMaster("local[1]")
     var sc = new SparkContext(conf)
 
-    val testRDD2D = sc.parallelize( generateInput2D( T => f(T), -10, 10, 100 ), 2).cache
-    val testRDD3D = sc.parallelize( generateInput3D( (x,y) => f3D(x,y), -10, 10, -10, 10, 200 ), 2).cache
-    val testRDD4D = sc.parallelize( generateInput4D( t => f4D(t), -10, 10, 100 ), 2 ).cache
-    
-    val validationRDD2D = sc.parallelize( generateInput2D( T => f(T), -10, 10, 100 ), 2).cache
-    val validationRDD3D = sc.parallelize( generateInput3D( (x,y) => f3D(x,y), -10, 10, -10, 10, 100 ), 2).cache
-    val validationRDD4D = sc.parallelize( generateInput4D( t => f4D(t), -10, 10, 100 ), 2 ).cache
-
-    if( graphic ) {
+    val testRDD2D = sc.parallelize(generateInput2D(T => f(T), -10, 10, 100), 2).cache
+    val testRDD3D = sc.parallelize(generateInput3D((x, y) => f3D(x, y), -10, 10, -10, 10, 200), 2).cache
+    val testRDD4D = sc.parallelize(generateInput4D(t => f4D(t), -10, 10, 100), 2).cache
 
-      outputFrame2D.setData( testRDD2D.map( T => concat( T._1, T._2 ) ) )
-      outputFrame3D.setData( testRDD3D.map( T => concat( T._1, T._2 ) ) )
-      outputFrame4D.setData( testRDD4D.map( T => T._2 ) )
+    val validationRDD2D = sc.parallelize(generateInput2D(T => f(T), -10, 10, 100), 2).cache
+    val validationRDD3D = sc.parallelize(generateInput3D((x, y) => f3D(x, y), -10, 10, -10, 10, 100), 2).cache
+    val validationRDD4D = sc.parallelize(generateInput4D(t => f4D(t), -10, 10, 100), 2).cache
 
-    }
+    if (graphic) {
 
-    val parallelANN2D = new ParallelANN( Array[Int]( 1, 3, 3, 1 ) )
-    parallelANN2D.optimizer.setNumIterations(1000).setStepSize( 1.0 )
+      outputFrame2D.setData(testRDD2D.map(T => concat(T._1, T._2)))
+      outputFrame3D.setData(testRDD3D.map(T => concat(T._1, T._2)))
+      outputFrame4D.setData(testRDD4D.map(T => T._2))
 
-    val parallelANN3D = new ParallelANN( Array[Int]( 2, 20, 1 ) )
-    parallelANN3D.optimizer.setNumIterations(1000).setStepSize( 1.0 )
+    }
 
-    val parallelANN4D = new ParallelANN( Array[Int]( 1, 20, 3 ) )
-    parallelANN4D.optimizer.setNumIterations( 1000 ).setStepSize( 1.0 )
-    
     val starttime = Calendar.getInstance().getTime()
-    println( "Start training " + starttime )
+    println("Start training " + starttime)
 
-    var model2D = parallelANN2D.train( testRDD2D )
-    var model3D = parallelANN3D.train( testRDD3D )
-    var model4D = parallelANN4D.train( testRDD4D )
+    val numIterations = 1000
+    val stepSize = 1.0
+    var model2D = ArtificialNeuralNetwork.train(testRDD2D, Array[Int](1, 3, 3, 1), numIterations, stepSize)
+    var model3D = ArtificialNeuralNetwork.train(testRDD3D, Array[Int](2, 20, 1), numIterations, stepSize)
+    var model4D = ArtificialNeuralNetwork.train(testRDD4D, Array[Int](1, 20, 3), numIterations, stepSize)
 
     val noIt = 1500
-    var errHist = new Array[(Int,Double,Double,Double)]( noIt )
+    var errHist = new Array[(Int, Double, Double, Double)](noIt)
 
-    for( i <- 0 to noIt - 1 ) {
+    for (i <- 0 to noIt - 1) {
 
-      val predictedAndTarget2D = validationRDD2D.map( T => ( T._1, T._2, model2D.predictV( T._1 ) ) )
-      val predictedAndTarget3D = validationRDD3D.map( T => ( T._1, T._2, model3D.predictV( T._1 ) ) )
-      val predictedAndTarget4D = validationRDD4D.map( T => ( T._1, T._2, model4D.predictV( T._1 ) ) )
+      val predictedAndTarget2D = validationRDD2D.map(T => (T._1, T._2, model2D.predictV(T._1)))
+      val predictedAndTarget3D = validationRDD3D.map(T => (T._1, T._2, model3D.predictV(T._1)))
+      val predictedAndTarget4D = validationRDD4D.map(T => (T._1, T._2, model4D.predictV(T._1)))
 
-      var err2D = predictedAndTarget2D.map( T =>
-        (T._3.toArray(0) - T._2.toArray(0))*(T._3.toArray(0) - T._2.toArray(0))
-      ).reduce( (u,v) => u + v )
+      var err2D = predictedAndTarget2D.map(T =>
+        (T._3.toArray(0) - T._2.toArray(0)) * (T._3.toArray(0) - T._2.toArray(0))
+      ).reduce((u, v) => u + v)
 
-      var err3D = predictedAndTarget3D.map( T =>
-        (T._3.toArray(0) - T._2.toArray(0))*(T._3.toArray(0) - T._2.toArray(0))
-      ).reduce( (u,v) => u + v )
+      var err3D = predictedAndTarget3D.map(T =>
+        (T._3.toArray(0) - T._2.toArray(0)) * (T._3.toArray(0) - T._2.toArray(0))
+      ).reduce((u, v) => u + v)
 
-      var err4D = predictedAndTarget4D.map( T => {
+      var err4D = predictedAndTarget4D.map(T => {
 
         val v1 = T._2.toArray
         val v2 = T._3.toArray
 
-        (v1(0) - v2(0))*(v1(0) - v2(0)) +
-        (v1(1) - v2(1))*(v1(1) - v2(1)) +
-        (v1(2) - v2(2))*(v1(2) - v2(2))
+        (v1(0) - v2(0)) * (v1(0) - v2(0)) +
+          (v1(1) - v2(1)) * (v1(1) - v2(1)) +
+          (v1(2) - v2(2)) * (v1(2) - v2(2))
 
-      } ).reduce( (u,v) => u + v )
+      }).reduce((u, v) => u + v)
 
 
-      if( graphic ) {
+      if (graphic) {
 
         val predicted2D = predictedAndTarget2D.map(
-          T => concat( T._1, T._3 )
+          T => concat(T._1, T._3)
         )
 
         val predicted3D = predictedAndTarget3D.map(
-          T => concat( T._1, T._3 )
+          T => concat(T._1, T._3)
         )
 
         val predicted4D = predictedAndTarget4D.map(
           T => T._3
         )
 
-        curAngle = curAngle + math.Pi/4
-        if( curAngle>=2*math.Pi ) {
-          curAngle = curAngle - 2*math.Pi
+        curAngle = curAngle + math.Pi / 4
+        if (curAngle >= 2 * math.Pi) {
+          curAngle = curAngle - 2 * math.Pi
         }
 
-        outputFrame3D.setAngle( curAngle )
-        outputFrame4D.setAngle( curAngle )
+        outputFrame3D.setAngle(curAngle)
+        outputFrame4D.setAngle(curAngle)
 
-        outputFrame2D.setApproxPoints( predicted2D )
-        outputFrame3D.setApproxPoints( predicted3D )
-        outputFrame4D.setApproxPoints( predicted4D )
+        outputFrame2D.setApproxPoints(predicted2D)
+        outputFrame3D.setApproxPoints(predicted3D)
+        outputFrame4D.setApproxPoints(predicted4D)
 
       }
 
-      println( "It. "+i+" ("+Calendar.getInstance().getTime()+"), Error 2D/3D/4D: " + (err2D, err3D, err4D) )
-      errHist(i) = ( i, err2D, err3D, err4D )
+      println("It. " + i + " (" + Calendar.getInstance().getTime() + "), Error 2D/3D/4D: " +(err2D, err3D, err4D))
+      errHist(i) = (i, err2D, err3D, err4D)
 
-      if( i < noIt - 1 ) {
-        model2D = parallelANN2D.train( testRDD2D, model2D )
-        model3D = parallelANN3D.train( testRDD3D, model3D )
-        model4D = parallelANN4D.train( testRDD4D, model4D )
+      if (i < noIt - 1) {
+        model2D = ArtificialNeuralNetwork.train(testRDD2D, model2D.topology, model2D.weights, numIterations, stepSize)
+        model3D = ArtificialNeuralNetwork.train(testRDD3D, model3D.topology, model3D.weights, numIterations, stepSize)
+        model4D = ArtificialNeuralNetwork.train(testRDD4D, model4D.topology, model4D.weights, numIterations, stepSize)
       }
 
     }
@@ -265,11 +258,11 @@ object TestParallelANN {
 
     val stoptime = Calendar.getInstance().getTime()
 
-    for( i <- 0 to noIt - 1 ) {
-      println( errHist(i) )
+    for (i <- 0 to noIt - 1) {
+      println(errHist(i))
     }
 
-    println( formatter.format( starttime )+"-" + formatter.format( stoptime ) + " "+(stoptime.getTime-starttime.getTime+500)/1000+" seconds" )
+    println(formatter.format(starttime) + "-" + formatter.format(stoptime) + " " + (stoptime.getTime - starttime.getTime + 500) / 1000 + " seconds")
 
   }
 

From 181c29b5ed678eebe6fdc6bf8227a602a7e153d0 Mon Sep 17 00:00:00 2001
From: Alexander Ulanov <nashb@yandex.ru>
Date: Tue, 9 Sep 2014 19:02:20 +0400
Subject: [PATCH 099/143] Apache header

---
 .../org/apache/spark/mllib/ann/ANNSuite.scala   | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
index 30f51a973712f..692366699a5fb 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
 package org.apache.spark.mllib.ann
 
 import org.apache.spark.mllib.linalg.Vectors

From 7ac9a67f61bf1ef0733c9c7feadbf3f579e8bfaa Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Wed, 10 Sep 2014 13:58:59 +0800
Subject: [PATCH 100/143] Update ArtificialNeuralNetwork.scala

Replaced fors by whiles
---
 .../mllib/ann/ArtificialNeuralNetwork.scala   | 194 ++++++++++++------
 1 file changed, 135 insertions(+), 59 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index 6ecbb18202817..9294c46727f5c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -77,9 +77,11 @@ class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topol
     val tmp = new Array[Int](L + 1)
     var curPos = 0
     tmp(0) = 0
-    for (l <- 1 to L) {
+    var l = 1
+    while(l <= L) {
       tmp(l) = curPos
       curPos = curPos + (topology(l - 1) + 1) * (topology(l))
+      l = l + 1
     }
     tmp
   }
@@ -88,20 +90,33 @@ class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topol
 
   def computeValues(arrData: Array[Double], arrWeights: Array[Double]): Array[Double] = {
     var arrPrev = new Array[Double](topology(0))
-    for (i <- 0 until topology(0))
+    var i: Int = 0
+    var j: Int = 0
+    var l: Int = 0
+    i = 0
+    while(i < topology(0)) {
       arrPrev(i) = arrData(i)
-    for (l <- 1 to L) {
+      i = i + 1
+    }
+    l = 1
+    while(l <= L) {
       val arrCur = new Array[Double](topology(l))
-      for (j <- 0 until topology(l)) {
+      j = 0
+      while(j < topology(l)) {
         var cum: Double = 0.0
-        for (i <- 0 until topology(l - 1))
+        i = 0
+        while( i < topology(l - 1) ) {
           cum = cum +
             arrPrev(i) * arrWeights(ofsWeight(l) + (topology(l - 1) + 1) * j + i)
+          i = i + 1
+        }
         cum = cum +
           arrWeights(ofsWeight(l) + (topology(l - 1) + 1) * j + topology(l - 1)) // bias
         arrCur(j) = g(cum)
+        j = j + 1
       }
       arrPrev = arrCur
+      l = l + 1
     }
     arrPrev
   }
@@ -126,13 +141,14 @@ class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topol
   def predictV(testData: Vector): Vector = {
     predictPointV(testData, weights)
   }
+
 }
 
 class ArtificialNeuralNetwork private(
-                           private var topology: Array[Int],
-                           private var numIterations: Int,
-                           private var stepSize: Double,
-                           private var miniBatchFraction: Double)
+    private var topology: Array[Int],
+    private var numIterations: Int,
+    private var stepSize: Double,
+    private var miniBatchFraction: Double)
   extends Serializable {
 
   private val gradient = new ANNLeastSquaresGradient(topology)
@@ -158,60 +174,80 @@ class ArtificialNeuralNetwork private(
 object ArtificialNeuralNetwork {
 
   def train(
-             input: RDD[(Vector, Vector)],
-             topology: Array[Int],
-             initialWeights: Vector,
-             numIterations: Int,
-             stepSize: Double,
-             miniBatchFraction: Double): ArtificialNeuralNetworkModel = {
+      input: RDD[(Vector, Vector)],
+      topology: Array[Int],
+      initialWeights: Vector,
+      numIterations: Int,
+      stepSize: Double,
+      miniBatchFraction: Double): ArtificialNeuralNetworkModel = {
     new ArtificialNeuralNetwork(topology, numIterations, stepSize, miniBatchFraction)
       .run(input, initialWeights)
   }
 
   def train(
-             input: RDD[(Vector, Vector)],
-             topology: Array[Int],
-             initialWeights: Vector,
-             numIterations: Int,
-             stepSize: Double): ArtificialNeuralNetworkModel = {
+      input: RDD[(Vector, Vector)],
+      topology: Array[Int],
+      initialWeights: Vector,
+      numIterations: Int,
+      stepSize: Double): ArtificialNeuralNetworkModel = {
     new ArtificialNeuralNetwork(topology, numIterations, stepSize, 1.0).run(input, initialWeights)
   }
 
   def train(
-             input: RDD[(Vector, Vector)],
-             topology: Array[Int],
-             numIterations: Int,
-             stepSize: Double,
-             miniBatchFraction: Double): ArtificialNeuralNetworkModel = {
+      input: RDD[(Vector,Vector)],
+      model: ArtificialNeuralNetworkModel,
+      numIterations: Int,
+      stepSize: Double): ArtificialNeuralNetworkModel = {
+    train(input, model.topology, model.weights, numIterations, stepSize)
+  }
+
+
+  def train(
+      input: RDD[(Vector, Vector)],
+      topology: Array[Int],
+      numIterations: Int,
+      stepSize: Double,
+      miniBatchFraction: Double): ArtificialNeuralNetworkModel = {
     new ArtificialNeuralNetwork(topology, numIterations, stepSize, miniBatchFraction)
       .run(input, randomWeights(topology))
   }
 
   def train(
-             input: RDD[(Vector, Vector)],
-             topology: Array[Int],
-             numIterations: Int,
-             stepSize: Double): ArtificialNeuralNetworkModel = {
+      input: RDD[(Vector, Vector)],
+      topology: Array[Int],
+      numIterations: Int,
+      stepSize: Double): ArtificialNeuralNetworkModel = {
     train(input, topology, numIterations, stepSize, 1.0)
   }
 
   def randomWeights(topology: Array[Int]): Vector = {
     val rand = new XORShiftRandom()
+    
+    var i: Int = 0
+    var l: Int = 0
+    
     val noWeights = {
       var tmp = 0
-      for (i <- 1 until topology.size) {
+      var i = 1
+      while(i < topology.size) {
         tmp = tmp + topology(i) * (topology(i - 1) + 1)
+        i = i + 1
       }
       tmp
     }
 
     val initialWeightsArr = new Array[Double](noWeights)
     var pos = 0;
-    for (l <- 1 until topology.length) {
-      for (i <- 0 until (topology(l) * (topology(l - 1) + 1))) {
+    
+    l = 1
+    while( l < topology.length) {
+      i = 0
+      while(i < (topology(l) * (topology(l - 1) + 1))) {
         initialWeightsArr(pos) = (rand.nextDouble * 4.8 - 2.4) / (topology(l - 1) + 1)
         pos += 1;
+        i += 1
       }
+      l += 1
     }
     Vectors.dense(initialWeightsArr)
   }
@@ -226,8 +262,10 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
 
   private val noWeights = {
     var tmp = 0
-    for (i <- 1 to L) {
-      tmp = tmp + topology(i) * (topology(i - 1) + 1)
+    var l = 1
+    while(l <= L) {
+      tmp = tmp + topology(l) * (topology(l - 1) + 1)
+      l += 1
     }
     tmp
   }
@@ -236,17 +274,21 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
     val tmp = new Array[Int](L + 1)
     var curPos = 0;
     tmp(0) = 0;
-    for (l <- 1 to L) {
+    var l = 1
+    while(l <= L) {
       tmp(l) = curPos
       curPos = curPos + (topology(l - 1) + 1) * (topology(l))
+      l += 1
     }
     tmp
   }
 
   val noNodes: Int = {
     var tmp: Integer = 0
-    for (l <- 0 until topology.size) {
+    var l = 0
+    while(l < topology.size) {
       tmp = tmp + topology(l)
+      l += 1
     }
     tmp
   }
@@ -254,8 +296,10 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
   val ofsNode: Array[Int] = {
     val tmp = new Array[Int](L + 1)
     tmp(0) = 0
-    for (l <- 1 to L) {
+    var l = 1
+    while(l <= L) {
       tmp(l) = tmp(l - 1) + topology(l - 1)
+      l += 1
     }
     tmp
   }
@@ -264,71 +308,102 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
     val arrData = data.toArray
     val arrWeights = weights.toArray
     val arrNodes = new Array[Double](noNodes)
+        
+    var i: Int = 0
+    var j: Int = 0
+    var l: Int = 0
+    
     // forward run
-    for (i <- 0 until topology(0)) {
+    i = 0;
+    while(i < topology(0)) {
       arrNodes(i) = arrData(i)
+      i += 1
     }
-    for (l <- 1 to L) {
-      for (j <- 0 until topology(l)) {
+    l = 1
+    while( l <= L ) {
+      j = 0
+      while(j < topology(l)) {
         var cum: Double = 0.0;
-        for (i <- 0 until topology(l - 1)) {
+        i = 0
+        while(i < topology(l - 1)) {
           cum = cum +
             arrWeights(ofsWeight(l) + (topology(l - 1) + 1) * j + i) *
               arrNodes(ofsNode(l - 1) + i)
+          i += 1
         }
         cum = cum + arrWeights(ofsWeight(l) + (topology(l - 1) + 1) * j + topology(l - 1))
         arrNodes(ofsNode(l) + j) = g(cum)
+        j += 1
       }
+      l += 1
     }
     val arrDiff = new Array[Double](topology(L))
-    for (j <- 0 until topology(L)) {
+    j = 0
+    while( j < topology(L)) {
       arrDiff(j) = (arrNodes(ofsNode(L) + j) - arrData(topology(0) + j))
+      j += 1
     }
     var err: Double = 0;
-    for (j <- 0 until topology(L)) {
+    j = 0
+    while(j < topology(L)) {
       err = err + arrDiff(j) * arrDiff(j)
+      j += 1
     }
     err = err * .5
     // back propagation
     val arrDelta = new Array[Double](noNodes)
-    for (j <- 0 until topology(L)) {
+    j = 0
+    while(j < topology(L)) {
       arrDelta(ofsNode(L) + j) =
         arrDiff(j) *
           arrNodes(ofsNode(L) + j) * (1 - arrNodes(ofsNode(L) + j))
+      j += 1
     }
-    for (l <- L - 1 until 0 by -1) {
-      for (j <- 0 until topology(l)) {
+    l = L - 1
+    while(l > 0) {
+      j = 0
+      while(j < topology(l)) {
         var cum: Double = 0.0
-        for (i <- 0 until topology(l + 1)) {
+        i = 0
+        while( i < topology(l + 1)) {
           cum = cum +
             arrWeights(ofsWeight(l + 1) + (topology(l) + 1) * i + j) *
               arrDelta(ofsNode(l + 1) + i) *
               arrNodes(ofsNode(l) + j) * (1 - arrNodes(ofsNode(l) + j))
+          i += 1
         }
         arrDelta(ofsNode(l) + j) = cum
+        j += 1
       }
+      l -= 1
     }
     // gradient
     val arrGrad = new Array[Double](noWeights)
-    for (l <- 1 to L) {
-      for (j <- 0 until topology(l)) {
-        for (i <- 0 until topology(l - 1)) {
+    l = 1
+    while(l <= L) {
+      j = 0
+      while(j < topology(l)) {
+        i = 0
+        while(i < topology(l - 1)) {
           arrGrad(ofsWeight(l) + (topology(l - 1) + 1) * j + i) =
             arrNodes(ofsNode(l - 1) + i) *
               arrDelta(ofsNode(l) + j)
+          i += 1
         }
         arrGrad(ofsWeight(l) + (topology(l - 1) + 1) * j + topology(l - 1)) =
           arrDelta(ofsNode(l) + j)
+        j += 1
       }
+      l += 1
     }
     (Vectors.dense(arrGrad), err)
   }
 
   override def compute(
-                        data: Vector,
-                        label: Double,
-                        weights: Vector,
-                        cumGradient: Vector): Double = {
+      data: Vector,
+      label: Double,
+      weights: Vector,
+      cumGradient: Vector): Double = {
     val (grad, err) = compute(data, label, weights)
     cumGradient.toBreeze += grad.toBreeze
     return err
@@ -338,14 +413,15 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
 private class ANNUpdater extends Updater {
 
   override def compute(
-                        weightsOld: Vector,
-                        gradient: Vector,
-                        stepSize: Double,
-                        iter: Int,
-                        regParam: Double): (Vector, Double) = {
+      weightsOld: Vector,
+      gradient: Vector,
+      stepSize: Double,
+      iter: Int,
+      regParam: Double): (Vector, Double) = {
     val thisIterStepSize = stepSize
     val brzWeights: BV[Double] = weightsOld.toBreeze.toDenseVector
     brzAxpy(-thisIterStepSize, gradient.toBreeze, brzWeights)
     (Vectors.fromBreeze(brzWeights), 0)
   }
+
 }

From 8e0dc8b772ee0d51e26debbde6ca7784c7180177 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Wed, 10 Sep 2014 14:01:46 +0800
Subject: [PATCH 101/143] Update and rename TestParallelANN.scala to
 TestANN.scala

Adapted test to new interface
---
 .../org/apache/spark/mllib/ann/TestANN.scala  | 581 ++++++++++++++++++
 .../spark/mllib/ann/TestParallelANN.scala     | 269 --------
 2 files changed, 581 insertions(+), 269 deletions(-)
 create mode 100644 mllib/src/test/scala/org/apache/spark/mllib/ann/TestANN.scala
 delete mode 100644 mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestANN.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestANN.scala
new file mode 100644
index 0000000000000..9e6f59df3a11e
--- /dev/null
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestANN.scala
@@ -0,0 +1,581 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+package org.apache.spark.mllib.ann
+
+import java.awt._
+import java.awt.event._
+import java.text.SimpleDateFormat
+import java.util.Calendar
+import org.apache.spark._
+import org.apache.spark.mllib.ann._
+import org.apache.spark.mllib.linalg._
+import org.apache.spark.mllib.regression._
+import org.apache.spark.rdd.RDD
+import scala.Array.canBuildFrom
+import scala.util.Random
+
+object windowAdapter extends WindowAdapter {
+
+  override def windowClosing( e: WindowEvent ) {
+    System.exit(0)
+  }
+
+}
+
+class OutputCanvas2D( wd: Int, ht: Int ) extends Canvas {
+
+  var points: Array[Vector] = null
+  var approxPoints: Array[Vector] = null
+
+  /* input: rdd of (x,y) vectors */
+  def setData( rdd: RDD[Vector] ) {
+    points = rdd.collect
+    repaint
+  }
+
+  def setApproxPoints( rdd: RDD[Vector] ) {
+    approxPoints = rdd.collect
+    repaint
+  }
+
+  def plotDot( g: Graphics, x: Int, y: Int ) {
+    val r = 5
+    val noSamp = 6*r
+    var x1 = x
+    var y1 = y + r
+    for( j <- 1 to noSamp ) {
+      val x2 = (x.toDouble + math.sin( j.toDouble*2*math.Pi/noSamp )*r + .5).toInt
+      val y2 = (y.toDouble + math.cos( j.toDouble*2*math.Pi/noSamp )*r + .5).toInt
+      g.drawLine( x1, ht - y1, x2, ht - y2 )
+      x1 = x2
+      y1 = y2
+    }
+  }
+
+  override def paint( g: Graphics) = {
+
+    var xmax: Double = 0.0
+    var xmin: Double = 0.0
+    var ymax: Double = 0.0
+    var ymin: Double = 0.0
+
+    if( points!=null ) {
+
+      g.setColor( Color.black )
+      val x = points.map( T => (T.toArray)(0) )
+      val y = points.map( T => (T.toArray)(1) )
+
+      xmax = x.max
+      xmin = x.min
+      ymax = y.max
+      ymin = y.min
+
+      for( i <- 0 to x.size - 1 ) {
+
+        val xr = (((x(i).toDouble - xmin)/(xmax - xmin))*wd + .5).toInt
+        val yr = (((y(i).toDouble - ymin)/(ymax - ymin))*ht + .5).toInt
+        plotDot( g, xr, yr )
+
+      }
+
+      if( approxPoints != null ) {
+
+        g.setColor( Color.red )
+        val x = approxPoints.map( T => (T.toArray)(0) )
+        val y = approxPoints.map( T => (T.toArray)(1) )
+
+        for( i <- 0 to x.size-1 ) {
+          val xr = (((x(i).toDouble - xmin)/(xmax - xmin))*wd + .5).toInt
+          val yr = (((y(i).toDouble - ymin)/(ymax - ymin))*ht + .5).toInt
+          plotDot( g, xr, yr )
+        }
+
+      }
+
+    }
+
+  }
+
+}
+
+class OutputFrame2D( title: String ) extends Frame( title ) {
+
+  val wd = 800
+  val ht = 600
+
+  var outputCanvas = new OutputCanvas2D( wd, ht )
+
+  def apply() {
+    addWindowListener( windowAdapter )
+    setSize( wd, ht )
+    add( "Center", outputCanvas )
+    show()
+  }
+
+  def setData( rdd: RDD[Vector] ) {
+    outputCanvas.setData( rdd )
+  }
+
+  def setApproxPoints( rdd: RDD[Vector] ) {
+    outputCanvas.setApproxPoints( rdd )
+  }
+
+
+}
+
+object windowAdapter3D extends WindowAdapter {
+
+  override def windowClosing( e: WindowEvent ) {
+    System.exit(0)
+  }
+
+}
+
+class OutputCanvas3D( wd: Int, ht: Int, shadowFrac: Double ) extends Canvas {
+
+  var angle: Double = 0
+  var points: Array[Vector] = null
+  var approxPoints: Array[Vector] = null
+
+  /* 3 dimensional (x,y,z) vector */
+  def setData( rdd: RDD[Vector] ) {
+    points = rdd.collect
+    repaint
+  }
+
+  def setApproxPoints( rdd: RDD[Vector] ) {
+    approxPoints = rdd.collect
+    repaint
+  }
+
+  def plotDot( g: Graphics, x: Int, y: Int ) {
+    val r = 5
+    val noSamp = 6*r
+    var x1 = x
+    var y1 = y + r
+    for( j <- 1 to noSamp ) {
+      val x2 = (x.toDouble + math.sin( j.toDouble*2*math.Pi/noSamp )*r + .5).toInt
+      val y2 = (y.toDouble + math.cos( j.toDouble*2*math.Pi/noSamp )*r + .5).toInt
+      g.drawLine( x1, ht - y1, x2, ht - y2 )
+      x1 = x2
+      y1 = y2
+    }
+  }
+
+  def plotLine( g: Graphics, x1: Int, y1: Int, x2: Int, y2: Int ) {
+    g.drawLine( x1, ht - y1, x2, ht - y2 )
+  }
+
+  def calcCord( arr: Array[Double], angle: Double ): (Double, Double, Double, Double, Double, Double) = {
+
+    var arrOut = new Array[Double](6)
+
+    val x = arr(0)*math.cos( angle ) - arr(1)*math.sin( angle )
+    val y = arr(0)*math.sin( angle ) + arr(1)*math.cos( angle )
+    val z = arr(2)
+
+    val x0 = arr(0)*math.cos( angle ) - arr(1)*math.sin( angle )
+    val y0 = arr(0)*math.sin( angle ) + arr(1)*math.cos( angle )
+    val z0 = 0
+
+    val xs = (arr(0) + shadowFrac*arr(2))*math.cos( angle ) - arr(1)*math.sin( angle )
+    val ys = (arr(0) + shadowFrac*arr(2))*math.sin( angle ) + arr(1)*math.cos( angle )
+    val zs = 0
+
+    arrOut(0) = y - .5*x
+    arrOut(1) = z - .25*x
+
+    arrOut(2) = y0 - .5*x0
+    arrOut(3) = z0 - .25*x0
+
+    arrOut(4) = ys - .5*xs
+    arrOut(5) = zs - .25*xs
+
+    ( arrOut(0), arrOut(1), arrOut(2), arrOut(3), arrOut(4), arrOut(5) )
+
+  }
+
+  override def paint( g: Graphics) = {
+
+    if( points!=null ) {
+
+      var p = points.map( T => calcCord( T.toArray, angle ) ).toArray
+
+      var xmax = p(0)._1
+      var xmin = p(0)._1
+      var ymax = p(0)._2
+      var ymin = p(0)._2
+
+      for( i <- 0 to p.size-1 ) {
+
+        if( xmax<p(i)._1 ) {
+          xmax = p(i)._1
+        }
+        if( xmax<p(i)._3 ) {
+          xmax = p(i)._3
+        }
+        if( xmax<p(i)._5 ) {
+          xmax = p(i)._5
+        }
+
+        if( xmin>p(i)._1 ) {
+          xmin = p(i)._1
+        }
+        if( xmin>p(i)._3 ) {
+          xmin = p(i)._3
+        }
+        if( xmin>p(i)._5 ) {
+          xmin = p(i)._5
+        }
+
+        if( ymax<p(i)._2 ) {
+          ymax = p(i)._2
+        }
+        if( ymax<p(i)._4 ) {
+          ymax = p(i)._4
+        }
+        if( ymax<p(i)._6 ) {
+          ymax = p(i)._6
+        }
+
+        if( ymin>p(i)._2 ) {
+          ymin = p(i)._2
+        }
+        if( ymin>p(i)._4 ) {
+          ymin = p(i)._4
+        }
+        if( ymin>p(i)._6 ) {
+          ymin = p(i)._6
+        }
+
+      }
+
+      for( i <- 0 to p.size-1 ) {
+
+        var x_ = (((p(i)._1 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
+        var y_ = (((p(i)._2 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
+        var x0 = (((p(i)._3 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
+        var y0 = (((p(i)._4 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
+        var xs = (((p(i)._5 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
+        var ys = (((p(i)._6 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
+
+        g.setColor( Color.black )
+        plotDot( g, x_, y_ )
+        plotLine( g, x_, y_, x0, y0 )
+        g.setColor( Color.gray )
+        plotLine( g, x0, y0, xs, ys )
+
+      }
+
+      if( approxPoints != null ) {
+
+        var p = approxPoints.map( T => calcCord( T.toArray, angle ) )
+
+        for( i <- 0 to p.size-1 ) {
+
+          var x_ = (((p(i)._1 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
+          var y_ = (((p(i)._2 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
+          var x0 = (((p(i)._3 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
+          var y0 = (((p(i)._4 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
+          var xs = (((p(i)._5 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
+          var ys = (((p(i)._6 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
+
+          g.setColor( Color.red )
+          plotDot( g, x_, y_ )
+          plotLine( g, x_, y_, x0, y0 )
+          g.setColor( Color.magenta )
+          plotLine( g, x0, y0, xs, ys )
+
+        }
+
+      }
+
+    }
+  }
+}
+
+class OutputFrame3D( title: String, shadowFrac: Double ) extends Frame( title ) {
+
+  val wd = 800
+  val ht = 600
+
+  def this( title: String ) = this( title, .25 )
+
+  var outputCanvas = new OutputCanvas3D( wd, ht, shadowFrac )
+
+  def apply() {
+    addWindowListener( windowAdapter3D )
+    setSize( wd, ht )
+    add( "Center", outputCanvas )
+    show()
+  }
+
+  def setData( rdd: RDD[Vector] ) {
+    outputCanvas.setData( rdd )
+  }
+
+  def setAngle( angle: Double ) {
+    outputCanvas.angle = angle
+  }
+
+  def setApproxPoints( rdd: RDD[Vector] ) {
+    outputCanvas.setApproxPoints( rdd )
+  }
+
+}
+
+object TestANN {
+
+  var rand = new Random( 0 )
+
+  def generateInput2D( f: Double => Double, xmin: Double, xmax: Double, noPoints: Int ): Array[(Vector,Vector)] =
+  {
+
+    var out = new Array[(Vector,Vector)](noPoints)
+
+    for( i <- 0 to noPoints - 1 ) {
+      val x = xmin + rand.nextDouble()*(xmax - xmin)
+      val y = f(x)
+      out(i) = ( Vectors.dense( x ), Vectors.dense( y ) )
+    }
+
+    return out
+
+  }
+
+
+  def generateInput3D( f: (Double,Double) => Double, xmin: Double, xmax: Double, ymin: Double, ymax: Double, noPoints: Int ): Array[(Vector,Vector)] = {
+
+    var out = new Array[(Vector,Vector)](noPoints)
+
+    for( i <- 0 to noPoints - 1 ) {
+
+      val x = xmin + rand.nextDouble()*(xmax - xmin)
+      val y = ymin + rand.nextDouble()*(ymax - ymin)
+      val z = f( x, y )
+
+      var arr = new Array[Double](2)
+
+      arr(0) = x
+      arr(1) = y
+      out(i) = ( Vectors.dense( arr ), Vectors.dense( z ) )
+
+    }
+
+    out
+
+  }
+
+  def generateInput4D( f: Double => (Double,Double,Double), tmin: Double, tmax: Double, noPoints: Int ): Array[(Vector,Vector)] = {
+
+    var out = new Array[(Vector,Vector)](noPoints)
+
+    for( i <- 0 to noPoints - 1 ) {
+
+      val t: Double = tmin + rand.nextDouble()*(tmax - tmin)
+      var arr = new Array[Double](3)
+      var F = f(t)
+
+      arr(0) = F._1
+      arr(1) = F._2
+      arr(2) = F._3
+
+      out(i) = ( Vectors.dense( t ), Vectors.dense( arr ) )
+    }
+
+    out
+
+  }
+
+  def f( T: Double ): Double = {
+    val y = 0.5 + Math.abs(T/5).toInt.toDouble*.15 + math.sin(T*math.Pi/10)*.1
+    assert( y<= 1)
+    y
+  }
+
+  def f3D( x: Double, y: Double ): Double = {
+    .5 + .24*Math.sin( x*2*math.Pi/10 ) + .24*Math.cos( y*2*math.Pi/10 )
+  }
+
+  def f4D( t: Double ): (Double, Double,Double) = {
+    val x = Math.abs(.8*Math.cos( t*2*math.Pi/20 ) ) + .1
+    val y = (11 + t)/22
+    val z = .5 + .35*Math.sin(t*2*math.Pi/5)*Math.cos( t*2*math.Pi/10 ) + .15*t/11
+    ( x, y, z )
+  }
+
+  def concat( v1: Vector, v2: Vector ): Vector = {
+
+    var a1 = v1.toArray
+    var a2 = v2.toArray
+    var a3 = new Array[Double]( a1.size + a2.size )
+
+    for( i <- 0 to a1.size - 1 ) {
+      a3(i) = a1(i)
+    }
+
+    for( i <- 0 to a2.size - 1 ) {
+      a3(i + a1.size) = a2(i)
+    }
+
+    Vectors.dense( a3 )
+
+  }
+
+  def main( arg: Array[String] ) {
+
+    println( "ANN tester" )
+    println
+
+    val formatter = new SimpleDateFormat("hh:mm:ss")    
+
+    var curAngle: Double = 0.0
+    var graphic: Boolean = false
+
+    if( (arg.length>0) && (arg(0)=="graph" ) ) {
+      graphic = true
+    }
+
+    var outputFrame2D: OutputFrame2D = null
+    var outputFrame3D: OutputFrame3D = null
+    var outputFrame4D: OutputFrame3D = null
+
+    if( graphic ) {
+
+      outputFrame2D = new OutputFrame2D( "x -> y" )
+      outputFrame2D.apply
+
+      outputFrame3D = new OutputFrame3D( "(x,y) -> z", 1 )
+      outputFrame3D.apply
+
+      outputFrame4D = new OutputFrame3D( "t -> (x,y,z)" )
+      outputFrame4D.apply
+
+    }
+
+    var A = 20.0
+    var B = 50.0
+
+    var conf = new SparkConf().setAppName("Parallel ANN").setMaster("local[1]")
+    var sc = new SparkContext(conf)
+
+    val testRDD2D = sc.parallelize( generateInput2D( T => f(T), -10, 10, 100 ), 2).cache
+    val testRDD3D = sc.parallelize( generateInput3D( (x,y) => f3D(x,y), -10, 10, -10, 10, 200 ), 2).cache
+    val testRDD4D = sc.parallelize( generateInput4D( t => f4D(t), -10, 10, 100 ), 2 ).cache
+    
+    val validationRDD2D = sc.parallelize( generateInput2D( T => f(T), -10, 10, 100 ), 2).cache
+    val validationRDD3D = sc.parallelize( generateInput3D( (x,y) => f3D(x,y), -10, 10, -10, 10, 100 ), 2).cache
+    val validationRDD4D = sc.parallelize( generateInput4D( t => f4D(t), -10, 10, 100 ), 2 ).cache
+
+    if( graphic ) {
+
+      outputFrame2D.setData( testRDD2D.map( T => concat( T._1, T._2 ) ) )
+      outputFrame3D.setData( testRDD3D.map( T => concat( T._1, T._2 ) ) )
+      outputFrame4D.setData( testRDD4D.map( T => T._2 ) )
+
+    }
+    
+    val starttime = Calendar.getInstance().getTime()
+    println( "Start training " + starttime )
+
+    var model2D = ArtificialNeuralNetwork.train( testRDD2D, Array[Int](1, 3, 3, 1), 1000, 1.0)
+    var model3D = ArtificialNeuralNetwork.train( testRDD3D, Array[Int](2, 20, 1), 1000, 1.0)
+    var model4D = ArtificialNeuralNetwork.train( testRDD4D, Array[Int](1, 20, 3), 1000, 1.0 )
+
+    val noIt = 1500
+    var errHist = new Array[(Int,Double,Double,Double)]( noIt )
+
+    for( i <- 0 to noIt - 1 ) {
+
+      val predictedAndTarget2D = validationRDD2D.map( T => ( T._1, T._2, model2D.predictV( T._1 ) ) )
+      val predictedAndTarget3D = validationRDD3D.map( T => ( T._1, T._2, model3D.predictV( T._1 ) ) )
+      val predictedAndTarget4D = validationRDD4D.map( T => ( T._1, T._2, model4D.predictV( T._1 ) ) )
+
+      var err2D = predictedAndTarget2D.map( T =>
+        (T._3.toArray(0) - T._2.toArray(0))*(T._3.toArray(0) - T._2.toArray(0))
+      ).reduce( (u,v) => u + v )
+
+      var err3D = predictedAndTarget3D.map( T =>
+        (T._3.toArray(0) - T._2.toArray(0))*(T._3.toArray(0) - T._2.toArray(0))
+      ).reduce( (u,v) => u + v )
+
+      var err4D = predictedAndTarget4D.map( T => {
+
+        val v1 = T._2.toArray
+        val v2 = T._3.toArray
+
+        (v1(0) - v2(0))*(v1(0) - v2(0)) +
+        (v1(1) - v2(1))*(v1(1) - v2(1)) +
+        (v1(2) - v2(2))*(v1(2) - v2(2))
+
+      } ).reduce( (u,v) => u + v )
+
+
+      if( graphic ) {
+
+        val predicted2D = predictedAndTarget2D.map(
+          T => concat( T._1, T._3 )
+        )
+
+        val predicted3D = predictedAndTarget3D.map(
+          T => concat( T._1, T._3 )
+        )
+
+        val predicted4D = predictedAndTarget4D.map(
+          T => T._3
+        )
+
+        curAngle = curAngle + math.Pi/4
+        if( curAngle>=2*math.Pi ) {
+          curAngle = curAngle - 2*math.Pi
+        }
+
+        outputFrame3D.setAngle( curAngle )
+        outputFrame4D.setAngle( curAngle )
+
+        outputFrame2D.setApproxPoints( predicted2D )
+        outputFrame3D.setApproxPoints( predicted3D )
+        outputFrame4D.setApproxPoints( predicted4D )
+
+      }
+
+      println( "It. "+i+" ("+Calendar.getInstance().getTime()+"), Error 2D/3D/4D: " + (err2D, err3D, err4D) )
+      errHist(i) = ( i, err2D, err3D, err4D )
+
+      if( i < noIt - 1 ) {
+        model2D = ArtificialNeuralNetwork.train(testRDD2D, model2D, 1000, 1.0)
+        model3D = ArtificialNeuralNetwork.train(testRDD3D, model3D, 1000, 1.0)
+        model4D = ArtificialNeuralNetwork.train(testRDD4D, model4D, 1000, 1.0)
+      }
+
+    }
+
+    sc.stop
+
+    val stoptime = Calendar.getInstance().getTime()
+
+    for( i <- 0 to noIt - 1 ) {
+      println( errHist(i) )
+    }
+
+    println( formatter.format( starttime )+"-" + formatter.format( stoptime ) + " "+(stoptime.getTime-starttime.getTime+500)/1000+" seconds" )
+
+  }
+
+}
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
deleted file mode 100644
index b528da19dbfb9..0000000000000
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANN.scala
+++ /dev/null
@@ -1,269 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.spark.mllib.ann
-
-import java.text.SimpleDateFormat
-import java.util.Calendar
-
-import org.apache.spark._
-import org.apache.spark.mllib.linalg._
-
-import scala.util.Random
-
-object TestParallelANN {
-
-  var rand = new Random(0)
-
-  def generateInput2D(f: Double => Double, xmin: Double, xmax: Double, noPoints: Int):
-  Array[(Vector, Vector)] = {
-
-    var out = new Array[(Vector, Vector)](noPoints)
-
-    for (i <- 0 to noPoints - 1) {
-      val x = xmin + rand.nextDouble() * (xmax - xmin)
-      val y = f(x)
-      out(i) = (Vectors.dense(x), Vectors.dense(y))
-    }
-
-    return out
-
-  }
-
-
-  def generateInput3D(f: (Double, Double) => Double, xmin: Double, xmax: Double, ymin: Double, ymax: Double, noPoints: Int): Array[(Vector, Vector)] = {
-
-    var out = new Array[(Vector, Vector)](noPoints)
-
-    for (i <- 0 to noPoints - 1) {
-
-      val x = xmin + rand.nextDouble() * (xmax - xmin)
-      val y = ymin + rand.nextDouble() * (ymax - ymin)
-      val z = f(x, y)
-
-      var arr = new Array[Double](2)
-
-      arr(0) = x
-      arr(1) = y
-      out(i) = (Vectors.dense(arr), Vectors.dense(z))
-
-    }
-
-    out
-
-  }
-
-  def generateInput4D(f: Double => (Double, Double, Double), tmin: Double, tmax: Double, noPoints: Int): Array[(Vector, Vector)] = {
-
-    var out = new Array[(Vector, Vector)](noPoints)
-
-    for (i <- 0 to noPoints - 1) {
-
-      val t: Double = tmin + rand.nextDouble() * (tmax - tmin)
-      var arr = new Array[Double](3)
-      var F = f(t)
-
-      arr(0) = F._1
-      arr(1) = F._2
-      arr(2) = F._3
-
-      out(i) = (Vectors.dense(t), Vectors.dense(arr))
-    }
-
-    out
-
-  }
-
-  def f(T: Double): Double = {
-    val y = 0.5 + Math.abs(T / 5).toInt.toDouble * .15 + math.sin(T * math.Pi / 10) * .1
-    assert(y <= 1)
-    y
-  }
-
-  def f3D(x: Double, y: Double): Double = {
-    .5 +.24 * Math.sin(x * 2 * math.Pi / 10) +.24 * Math.cos(y * 2 * math.Pi / 10)
-  }
-
-  def f4D(t: Double): (Double, Double, Double) = {
-    val x = Math.abs(.8 * Math.cos(t * 2 * math.Pi / 20)) + .1
-    val y = (11 + t) / 22
-    val z =.5 +.35 * Math.sin(t * 2 * math.Pi / 5) * Math.cos(t * 2 * math.Pi / 10) +.15 * t / 11
-    (x, y, z)
-  }
-
-  def concat(v1: Vector, v2: Vector): Vector = {
-
-    var a1 = v1.toArray
-    var a2 = v2.toArray
-    var a3 = new Array[Double](a1.size + a2.size)
-
-    for (i <- 0 to a1.size - 1) {
-      a3(i) = a1(i)
-    }
-
-    for (i <- 0 to a2.size - 1) {
-      a3(i + a1.size) = a2(i)
-    }
-
-    Vectors.dense(a3)
-
-  }
-
-  def main(arg: Array[String]) {
-
-    println("Parallel ANN tester")
-    println
-
-    val formatter = new SimpleDateFormat("hh:mm:ss")
-
-    var curAngle: Double = 0.0
-    var graphic: Boolean = false
-
-    if ((arg.length > 0) && (arg(0) == "graph")) {
-      graphic = true
-    }
-
-    var outputFrame2D: OutputFrame2D = null
-    var outputFrame3D: OutputFrame3D = null
-    var outputFrame4D: OutputFrame3D = null
-
-    if (graphic) {
-
-      outputFrame2D = new OutputFrame2D("x -> y")
-      outputFrame2D.apply
-
-      outputFrame3D = new OutputFrame3D("(x,y) -> z", 1)
-      outputFrame3D.apply
-
-      outputFrame4D = new OutputFrame3D("t -> (x,y,z)")
-      outputFrame4D.apply
-
-    }
-
-    var A = 20.0
-    var B = 50.0
-
-    var conf = new SparkConf().setAppName("Parallel ANN").setMaster("local[1]")
-    var sc = new SparkContext(conf)
-
-    val testRDD2D = sc.parallelize(generateInput2D(T => f(T), -10, 10, 100), 2).cache
-    val testRDD3D = sc.parallelize(generateInput3D((x, y) => f3D(x, y), -10, 10, -10, 10, 200), 2).cache
-    val testRDD4D = sc.parallelize(generateInput4D(t => f4D(t), -10, 10, 100), 2).cache
-
-    val validationRDD2D = sc.parallelize(generateInput2D(T => f(T), -10, 10, 100), 2).cache
-    val validationRDD3D = sc.parallelize(generateInput3D((x, y) => f3D(x, y), -10, 10, -10, 10, 100), 2).cache
-    val validationRDD4D = sc.parallelize(generateInput4D(t => f4D(t), -10, 10, 100), 2).cache
-
-    if (graphic) {
-
-      outputFrame2D.setData(testRDD2D.map(T => concat(T._1, T._2)))
-      outputFrame3D.setData(testRDD3D.map(T => concat(T._1, T._2)))
-      outputFrame4D.setData(testRDD4D.map(T => T._2))
-
-    }
-
-    val starttime = Calendar.getInstance().getTime()
-    println("Start training " + starttime)
-
-    val numIterations = 1000
-    val stepSize = 1.0
-    var model2D = ArtificialNeuralNetwork.train(testRDD2D, Array[Int](1, 3, 3, 1), numIterations, stepSize)
-    var model3D = ArtificialNeuralNetwork.train(testRDD3D, Array[Int](2, 20, 1), numIterations, stepSize)
-    var model4D = ArtificialNeuralNetwork.train(testRDD4D, Array[Int](1, 20, 3), numIterations, stepSize)
-
-    val noIt = 1500
-    var errHist = new Array[(Int, Double, Double, Double)](noIt)
-
-    for (i <- 0 to noIt - 1) {
-
-      val predictedAndTarget2D = validationRDD2D.map(T => (T._1, T._2, model2D.predictV(T._1)))
-      val predictedAndTarget3D = validationRDD3D.map(T => (T._1, T._2, model3D.predictV(T._1)))
-      val predictedAndTarget4D = validationRDD4D.map(T => (T._1, T._2, model4D.predictV(T._1)))
-
-      var err2D = predictedAndTarget2D.map(T =>
-        (T._3.toArray(0) - T._2.toArray(0)) * (T._3.toArray(0) - T._2.toArray(0))
-      ).reduce((u, v) => u + v)
-
-      var err3D = predictedAndTarget3D.map(T =>
-        (T._3.toArray(0) - T._2.toArray(0)) * (T._3.toArray(0) - T._2.toArray(0))
-      ).reduce((u, v) => u + v)
-
-      var err4D = predictedAndTarget4D.map(T => {
-
-        val v1 = T._2.toArray
-        val v2 = T._3.toArray
-
-        (v1(0) - v2(0)) * (v1(0) - v2(0)) +
-          (v1(1) - v2(1)) * (v1(1) - v2(1)) +
-          (v1(2) - v2(2)) * (v1(2) - v2(2))
-
-      }).reduce((u, v) => u + v)
-
-
-      if (graphic) {
-
-        val predicted2D = predictedAndTarget2D.map(
-          T => concat(T._1, T._3)
-        )
-
-        val predicted3D = predictedAndTarget3D.map(
-          T => concat(T._1, T._3)
-        )
-
-        val predicted4D = predictedAndTarget4D.map(
-          T => T._3
-        )
-
-        curAngle = curAngle + math.Pi / 4
-        if (curAngle >= 2 * math.Pi) {
-          curAngle = curAngle - 2 * math.Pi
-        }
-
-        outputFrame3D.setAngle(curAngle)
-        outputFrame4D.setAngle(curAngle)
-
-        outputFrame2D.setApproxPoints(predicted2D)
-        outputFrame3D.setApproxPoints(predicted3D)
-        outputFrame4D.setApproxPoints(predicted4D)
-
-      }
-
-      println("It. " + i + " (" + Calendar.getInstance().getTime() + "), Error 2D/3D/4D: " +(err2D, err3D, err4D))
-      errHist(i) = (i, err2D, err3D, err4D)
-
-      if (i < noIt - 1) {
-        model2D = ArtificialNeuralNetwork.train(testRDD2D, model2D.topology, model2D.weights, numIterations, stepSize)
-        model3D = ArtificialNeuralNetwork.train(testRDD3D, model3D.topology, model3D.weights, numIterations, stepSize)
-        model4D = ArtificialNeuralNetwork.train(testRDD4D, model4D.topology, model4D.weights, numIterations, stepSize)
-      }
-
-    }
-
-    sc.stop
-
-    val stoptime = Calendar.getInstance().getTime()
-
-    for (i <- 0 to noIt - 1) {
-      println(errHist(i))
-    }
-
-    println(formatter.format(starttime) + "-" + formatter.format(stoptime) + " " + (stoptime.getTime - starttime.getTime + 500) / 1000 + " seconds")
-
-  }
-
-}

From 0a3fca6f45531a8f15ff2c2fd7887c65c18a1a23 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Wed, 10 Sep 2014 14:04:56 +0800
Subject: [PATCH 102/143] Delete TestParallelANNgraphics.scala

Combined with TestANN
---
 .../mllib/ann/TestParallelANNgraphics.scala   | 334 ------------------
 1 file changed, 334 deletions(-)
 delete mode 100644 mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala
deleted file mode 100644
index e206a8b7072a3..0000000000000
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestParallelANNgraphics.scala
+++ /dev/null
@@ -1,334 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.mllib.ann
-
-import java.awt._
-import java.awt.event._
-import org.apache.spark.rdd.RDD
-import org.apache.spark.mllib.linalg.Vector
-import scala.Array.canBuildFrom
-
-object windowAdapter extends WindowAdapter {
-
-  override def windowClosing( e: WindowEvent ) {
-    System.exit(0)
-  }
-
-}
-
-class OutputCanvas2D( wd: Int, ht: Int ) extends Canvas {
-
-  var points: Array[Vector] = null
-  var approxPoints: Array[Vector] = null
-
-  /* input: rdd of (x,y) vectors */
-  def setData( rdd: RDD[Vector] ) {
-    points = rdd.collect
-    repaint
-  }
-
-  def setApproxPoints( rdd: RDD[Vector] ) {
-    approxPoints = rdd.collect
-    repaint
-  }
-
-  def plotDot( g: Graphics, x: Int, y: Int ) {
-    val r = 5
-    val noSamp = 6*r
-    var x1 = x
-    var y1 = y + r
-    for( j <- 1 to noSamp ) {
-      val x2 = (x.toDouble + math.sin( j.toDouble*2*math.Pi/noSamp )*r + .5).toInt
-      val y2 = (y.toDouble + math.cos( j.toDouble*2*math.Pi/noSamp )*r + .5).toInt
-      g.drawLine( x1, ht - y1, x2, ht - y2 )
-      x1 = x2
-      y1 = y2
-    }
-  }
-
-  override def paint( g: Graphics) = {
-
-    var xmax: Double = 0.0
-    var xmin: Double = 0.0
-    var ymax: Double = 0.0
-    var ymin: Double = 0.0
-
-    if( points!=null ) {
-
-      g.setColor( Color.black )
-      val x = points.map( T => (T.toArray)(0) )
-      val y = points.map( T => (T.toArray)(1) )
-
-      xmax = x.max
-      xmin = x.min
-      ymax = y.max
-      ymin = y.min
-
-      for( i <- 0 to x.size - 1 ) {
-
-        val xr = (((x(i).toDouble - xmin)/(xmax - xmin))*wd + .5).toInt
-        val yr = (((y(i).toDouble - ymin)/(ymax - ymin))*ht + .5).toInt
-        plotDot( g, xr, yr )
-
-      }
-
-      if( approxPoints != null ) {
-
-        g.setColor( Color.red )
-        val x = approxPoints.map( T => (T.toArray)(0) )
-        val y = approxPoints.map( T => (T.toArray)(1) )
-
-        for( i <- 0 to x.size-1 ) {
-          val xr = (((x(i).toDouble - xmin)/(xmax - xmin))*wd + .5).toInt
-          val yr = (((y(i).toDouble - ymin)/(ymax - ymin))*ht + .5).toInt
-          plotDot( g, xr, yr )
-        }
-
-      }
-
-    }
-
-  }
-
-}
-
-class OutputFrame2D( title: String ) extends Frame( title ) {
-
-  val wd = 800
-  val ht = 600
-
-  var outputCanvas = new OutputCanvas2D( wd, ht )
-
-  def apply() {
-    addWindowListener( windowAdapter )
-    setSize( wd, ht )
-    add( "Center", outputCanvas )
-    show()
-  }
-
-  def setData( rdd: RDD[Vector] ) {
-    outputCanvas.setData( rdd )
-  }
-
-  def setApproxPoints( rdd: RDD[Vector] ) {
-    outputCanvas.setApproxPoints( rdd )
-  }
-
-
-}
-
-object windowAdapter3D extends WindowAdapter {
-
-  override def windowClosing( e: WindowEvent ) {
-    System.exit(0)
-  }
-
-}
-
-class OutputCanvas3D( wd: Int, ht: Int, shadowFrac: Double ) extends Canvas {
-
-  var angle: Double = 0
-  var points: Array[Vector] = null
-  var approxPoints: Array[Vector] = null
-
-  /* 3 dimensional (x,y,z) vector */
-  def setData( rdd: RDD[Vector] ) {
-    points = rdd.collect
-    repaint
-  }
-
-  def setApproxPoints( rdd: RDD[Vector] ) {
-    approxPoints = rdd.collect
-    repaint
-  }
-
-  def plotDot( g: Graphics, x: Int, y: Int ) {
-    val r = 5
-    val noSamp = 6*r
-    var x1 = x
-    var y1 = y + r
-    for( j <- 1 to noSamp ) {
-      val x2 = (x.toDouble + math.sin( j.toDouble*2*math.Pi/noSamp )*r + .5).toInt
-      val y2 = (y.toDouble + math.cos( j.toDouble*2*math.Pi/noSamp )*r + .5).toInt
-      g.drawLine( x1, ht - y1, x2, ht - y2 )
-      x1 = x2
-      y1 = y2
-    }
-  }
-
-  def plotLine( g: Graphics, x1: Int, y1: Int, x2: Int, y2: Int ) {
-    g.drawLine( x1, ht - y1, x2, ht - y2 )
-  }
-
-  def calcCord( arr: Array[Double], angle: Double ): (Double, Double, Double, Double, Double, Double) = {
-
-    var arrOut = new Array[Double](6)
-
-    val x = arr(0)*math.cos( angle ) - arr(1)*math.sin( angle )
-    val y = arr(0)*math.sin( angle ) + arr(1)*math.cos( angle )
-    val z = arr(2)
-
-    val x0 = arr(0)*math.cos( angle ) - arr(1)*math.sin( angle )
-    val y0 = arr(0)*math.sin( angle ) + arr(1)*math.cos( angle )
-    val z0 = 0
-
-    val xs = (arr(0) + shadowFrac*arr(2))*math.cos( angle ) - arr(1)*math.sin( angle )
-    val ys = (arr(0) + shadowFrac*arr(2))*math.sin( angle ) + arr(1)*math.cos( angle )
-    val zs = 0
-
-    arrOut(0) = y - .5*x
-    arrOut(1) = z - .25*x
-
-    arrOut(2) = y0 - .5*x0
-    arrOut(3) = z0 - .25*x0
-
-    arrOut(4) = ys - .5*xs
-    arrOut(5) = zs - .25*xs
-
-    ( arrOut(0), arrOut(1), arrOut(2), arrOut(3), arrOut(4), arrOut(5) )
-
-  }
-
-  override def paint( g: Graphics) = {
-
-    if( points!=null ) {
-
-      var p = points.map( T => calcCord( T.toArray, angle ) ).toArray
-
-      var xmax = p(0)._1
-      var xmin = p(0)._1
-      var ymax = p(0)._2
-      var ymin = p(0)._2
-
-      for( i <- 0 to p.size-1 ) {
-
-        if( xmax<p(i)._1 ) {
-          xmax = p(i)._1
-        }
-        if( xmax<p(i)._3 ) {
-          xmax = p(i)._3
-        }
-        if( xmax<p(i)._5 ) {
-          xmax = p(i)._5
-        }
-
-        if( xmin>p(i)._1 ) {
-          xmin = p(i)._1
-        }
-        if( xmin>p(i)._3 ) {
-          xmin = p(i)._3
-        }
-        if( xmin>p(i)._5 ) {
-          xmin = p(i)._5
-        }
-
-        if( ymax<p(i)._2 ) {
-          ymax = p(i)._2
-        }
-        if( ymax<p(i)._4 ) {
-          ymax = p(i)._4
-        }
-        if( ymax<p(i)._6 ) {
-          ymax = p(i)._6
-        }
-
-        if( ymin>p(i)._2 ) {
-          ymin = p(i)._2
-        }
-        if( ymin>p(i)._4 ) {
-          ymin = p(i)._4
-        }
-        if( ymin>p(i)._6 ) {
-          ymin = p(i)._6
-        }
-
-      }
-
-      for( i <- 0 to p.size-1 ) {
-
-        var x_ = (((p(i)._1 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
-        var y_ = (((p(i)._2 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
-        var x0 = (((p(i)._3 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
-        var y0 = (((p(i)._4 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
-        var xs = (((p(i)._5 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
-        var ys = (((p(i)._6 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
-
-        g.setColor( Color.black )
-        plotDot( g, x_, y_ )
-        plotLine( g, x_, y_, x0, y0 )
-        g.setColor( Color.gray )
-        plotLine( g, x0, y0, xs, ys )
-
-      }
-
-      if( approxPoints != null ) {
-
-        var p = approxPoints.map( T => calcCord( T.toArray, angle ) )
-
-        for( i <- 0 to p.size-1 ) {
-
-          var x_ = (((p(i)._1 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
-          var y_ = (((p(i)._2 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
-          var x0 = (((p(i)._3 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
-          var y0 = (((p(i)._4 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
-          var xs = (((p(i)._5 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
-          var ys = (((p(i)._6 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
-
-          g.setColor( Color.red )
-          plotDot( g, x_, y_ )
-          plotLine( g, x_, y_, x0, y0 )
-          g.setColor( Color.magenta )
-          plotLine( g, x0, y0, xs, ys )
-
-        }
-
-      }
-
-    }
-  }
-}
-
-class OutputFrame3D( title: String, shadowFrac: Double ) extends Frame( title ) {
-
-  val wd = 800
-  val ht = 600
-
-  def this( title: String ) = this( title, .25 )
-
-  var outputCanvas = new OutputCanvas3D( wd, ht, shadowFrac )
-
-  def apply() {
-    addWindowListener( windowAdapter3D )
-    setSize( wd, ht )
-    add( "Center", outputCanvas )
-    show()
-  }
-
-  def setData( rdd: RDD[Vector] ) {
-    outputCanvas.setData( rdd )
-  }
-
-  def setAngle( angle: Double ) {
-    outputCanvas.angle = angle
-  }
-
-  def setApproxPoints( rdd: RDD[Vector] ) {
-    outputCanvas.setApproxPoints( rdd )
-  }
-
-}

From 50ca819b0b4a53c436cb921891b65d91f3cdef4e Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Mon, 15 Sep 2014 09:25:18 +0800
Subject: [PATCH 103/143] Update ArtificialNeuralNetwork.scala

Changed "X = X + 1" to "X += 1".
---
 .../mllib/ann/ArtificialNeuralNetwork.scala   | 24 +++++++++----------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index 9294c46727f5c..bc7e3e4b6ceec 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -81,7 +81,7 @@ class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topol
     while(l <= L) {
       tmp(l) = curPos
       curPos = curPos + (topology(l - 1) + 1) * (topology(l))
-      l = l + 1
+      l += 1
     }
     tmp
   }
@@ -96,7 +96,7 @@ class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topol
     i = 0
     while(i < topology(0)) {
       arrPrev(i) = arrData(i)
-      i = i + 1
+      i += 1
     }
     l = 1
     while(l <= L) {
@@ -108,15 +108,15 @@ class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topol
         while( i < topology(l - 1) ) {
           cum = cum +
             arrPrev(i) * arrWeights(ofsWeight(l) + (topology(l - 1) + 1) * j + i)
-          i = i + 1
+          i += 1
         }
         cum = cum +
           arrWeights(ofsWeight(l) + (topology(l - 1) + 1) * j + topology(l - 1)) // bias
         arrCur(j) = g(cum)
-        j = j + 1
+        j += 1
       }
       arrPrev = arrCur
-      l = l + 1
+      l += 1
     }
     arrPrev
   }
@@ -201,7 +201,6 @@ object ArtificialNeuralNetwork {
     train(input, model.topology, model.weights, numIterations, stepSize)
   }
 
-
   def train(
       input: RDD[(Vector, Vector)],
       topology: Array[Int],
@@ -222,23 +221,23 @@ object ArtificialNeuralNetwork {
 
   def randomWeights(topology: Array[Int]): Vector = {
     val rand = new XORShiftRandom()
-    
+
     var i: Int = 0
     var l: Int = 0
-    
+
     val noWeights = {
       var tmp = 0
       var i = 1
       while(i < topology.size) {
         tmp = tmp + topology(i) * (topology(i - 1) + 1)
-        i = i + 1
+        i += 1
       }
       tmp
     }
 
     val initialWeightsArr = new Array[Double](noWeights)
     var pos = 0;
-    
+
     l = 1
     while( l < topology.length) {
       i = 0
@@ -308,11 +307,11 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
     val arrData = data.toArray
     val arrWeights = weights.toArray
     val arrNodes = new Array[Double](noNodes)
-        
+
     var i: Int = 0
     var j: Int = 0
     var l: Int = 0
-    
+
     // forward run
     i = 0;
     while(i < topology(0)) {
@@ -343,6 +342,7 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
       arrDiff(j) = (arrNodes(ofsNode(L) + j) - arrData(topology(0) + j))
       j += 1
     }
+
     var err: Double = 0;
     j = 0
     while(j < topology(L)) {

From c2da9b07d5313ab1ac4a101f2f0aa88846f82716 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Mon, 15 Sep 2014 09:26:05 +0800
Subject: [PATCH 104/143] Update ANNSuite.scala

Added test for gradient.
---
 .../org/apache/spark/mllib/ann/ANNSuite.scala | 116 ++++++++++++++++--
 1 file changed, 106 insertions(+), 10 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
index 692366699a5fb..c6ef5b79ed2f6 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
@@ -19,20 +19,22 @@ package org.apache.spark.mllib.ann
 
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.util.LocalSparkContext
+import org.apache.spark.util.random.XORShiftRandom
 import org.scalatest.FunSuite
 
 class ANNSuite extends FunSuite with LocalSparkContext {
-  private val inputs = Array[Array[Double]](
-    Array[Double](0,0),
-    Array[Double](0,1),
-    Array[Double](1,0),
-    Array[Double](1,1)
-  )
-  private val outputs = Array[Double](0, 1, 1, 0)
-  private val inputSize = 2
-  private val hiddenSize = 5
-  private val outputSize = 1
+
   test("ANN learns XOR function") {
+    val inputs = Array[Array[Double]](
+      Array[Double](0,0),
+      Array[Double](0,1),
+      Array[Double](1,0),
+      Array[Double](1,1)
+    )
+    val outputs = Array[Double](0, 1, 1, 0)
+    val inputSize = 2
+    val hiddenSize = 5
+    val outputSize = 1
     val data = inputs.zip(outputs).map { case(features, label) =>
       (Vectors.dense(features), Vectors.dense(Array(label)))}
     val rddData = sc.parallelize(data, 2)
@@ -42,4 +44,98 @@ class ANNSuite extends FunSuite with LocalSparkContext {
       (model.predictV(input)(0), label(0)) }.collect()
     assert(predictionAndLabels.forall { case(p, l) => (math.round(p) - l) == 0 })
   }
+
+  test("Gradient of ANN") {
+
+    val eps = 1e-6
+    val accept = 1e-7
+
+    val topologyArr = Array[Array[Int]](
+      Array[Int](1, 5, 1),
+      Array[Int](5, 10, 5, 3),
+      Array[Int](128, 256, 128)
+    )
+
+    val rnd = new XORShiftRandom(0)
+
+    var cnt = 0
+    while( cnt<topologyArr.length ) {
+
+      val topology = topologyArr(cnt)
+      val L = topology.length - 1
+      val noInp = topology(0)
+      val noOut = topology(L)
+      val annGradient = new ANNLeastSquaresGradient(topology)
+      var noWeights = 0
+
+      var l = 1
+      while(l <= L) {
+        noWeights += (topology(l - 1) + 1)*(topology(l))
+        l += 1
+      }
+
+      val arrWeights = new Array[Double](noWeights)
+
+      var w = 0
+      while(w < noWeights) {
+        arrWeights(w) = rnd.nextDouble()
+        w += 1
+      }
+
+      val arrInp = new Array[Double](noInp)
+      val arrOut = new Array[Double](noOut)
+      val arrData = new Array[Double](noInp + noOut)
+
+      w = 0
+      while(w < noInp) {
+        arrInp(w) = rnd.nextDouble()
+        arrData(w) = arrInp(w)
+        w += 1
+      }
+
+      w = 0
+      while(w < noOut) {
+        arrOut(w) = rnd.nextDouble()
+        arrData(noInp + w) = arrOut(w)
+        w += 1
+      }
+
+      val data = Vectors.dense( arrData )
+      val brzOut = Vectors.dense( arrOut ).toBreeze
+      val weights = Vectors.dense( arrWeights )
+      val gradient = annGradient.compute( data, 0.0, weights )._1
+
+      val arrTmpWeights = new Array[Double]( noWeights )
+      Array.copy(arrWeights, 0, arrTmpWeights, 0, noWeights )
+      val tmpWeights = Vectors.dense( arrTmpWeights )
+
+      w = 0
+      while(w < noWeights)
+      {
+        arrTmpWeights(w) = arrTmpWeights(w) + eps
+
+        val annModel1 = new ArtificialNeuralNetworkModel(weights, topology)
+        val brzO1 = annModel1.predictV(data).toBreeze
+
+        val annModel2 = new ArtificialNeuralNetworkModel(tmpWeights, topology)
+        val brzO2 = annModel2.predictV(data).toBreeze
+
+        val E1 = .5*((brzO1 - brzOut).dot(brzO1 - brzOut))
+        val E2 = .5*((brzO2 - brzOut).dot(brzO2 - brzOut))
+        val dEdW = ( E2 - E1 ) / eps
+
+        val gradw = gradient(w)
+        val err = math.abs(dEdW - gradw)
+        assert(err < accept, s"Difference between calculated and approximated gradient too large (approximated $dEdW, calculated $gradw, difference $err)" )
+
+        arrTmpWeights(w) = arrTmpWeights(w) - eps
+
+        w += 1
+      }
+
+      cnt += 1
+    }
+
+  }
+
 }

From 73ba0dc52d104ac5e166dcfa1bac806a86152ff5 Mon Sep 17 00:00:00 2001
From: Alexander Ulanov <nashb@yandex.ru>
Date: Wed, 17 Sep 2014 16:09:15 +0400
Subject: [PATCH 105/143] minor style fixes

---
 .../mllib/ann/ArtificialNeuralNetwork.scala   | 22 +++++++++----------
 .../org/apache/spark/mllib/ann/ANNSuite.scala |  6 ++---
 2 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index bc7e3e4b6ceec..aa0d42bedcbbf 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -80,7 +80,7 @@ class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topol
     var l = 1
     while(l <= L) {
       tmp(l) = curPos
-      curPos = curPos + (topology(l - 1) + 1) * (topology(l))
+      curPos = curPos + (topology(l - 1) + 1) * topology(l)
       l += 1
     }
     tmp
@@ -236,14 +236,14 @@ object ArtificialNeuralNetwork {
     }
 
     val initialWeightsArr = new Array[Double](noWeights)
-    var pos = 0;
+    var pos = 0
 
     l = 1
     while( l < topology.length) {
       i = 0
       while(i < (topology(l) * (topology(l - 1) + 1))) {
         initialWeightsArr(pos) = (rand.nextDouble * 4.8 - 2.4) / (topology(l - 1) + 1)
-        pos += 1;
+        pos += 1
         i += 1
       }
       l += 1
@@ -271,12 +271,12 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
 
   val ofsWeight: Array[Int] = {
     val tmp = new Array[Int](L + 1)
-    var curPos = 0;
-    tmp(0) = 0;
+    var curPos = 0
+    tmp(0) = 0
     var l = 1
     while(l <= L) {
       tmp(l) = curPos
-      curPos = curPos + (topology(l - 1) + 1) * (topology(l))
+      curPos = curPos + (topology(l - 1) + 1) * topology(l)
       l += 1
     }
     tmp
@@ -313,7 +313,7 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
     var l: Int = 0
 
     // forward run
-    i = 0;
+    i = 0
     while(i < topology(0)) {
       arrNodes(i) = arrData(i)
       i += 1
@@ -322,7 +322,7 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
     while( l <= L ) {
       j = 0
       while(j < topology(l)) {
-        var cum: Double = 0.0;
+        var cum: Double = 0.0
         i = 0
         while(i < topology(l - 1)) {
           cum = cum +
@@ -339,11 +339,11 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
     val arrDiff = new Array[Double](topology(L))
     j = 0
     while( j < topology(L)) {
-      arrDiff(j) = (arrNodes(ofsNode(L) + j) - arrData(topology(0) + j))
+      arrDiff(j) = arrNodes(ofsNode(L) + j) - arrData(topology(0) + j)
       j += 1
     }
 
-    var err: Double = 0;
+    var err: Double = 0
     j = 0
     while(j < topology(L)) {
       err = err + arrDiff(j) * arrDiff(j)
@@ -406,7 +406,7 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
       cumGradient: Vector): Double = {
     val (grad, err) = compute(data, label, weights)
     cumGradient.toBreeze += grad.toBreeze
-    return err
+    err
   }
 }
 
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
index c6ef5b79ed2f6..ddf3ff9009ec4 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
@@ -70,7 +70,7 @@ class ANNSuite extends FunSuite with LocalSparkContext {
 
       var l = 1
       while(l <= L) {
-        noWeights += (topology(l - 1) + 1)*(topology(l))
+        noWeights += (topology(l - 1) + 1) * topology(l)
         l += 1
       }
 
@@ -120,8 +120,8 @@ class ANNSuite extends FunSuite with LocalSparkContext {
         val annModel2 = new ArtificialNeuralNetworkModel(tmpWeights, topology)
         val brzO2 = annModel2.predictV(data).toBreeze
 
-        val E1 = .5*((brzO1 - brzOut).dot(brzO1 - brzOut))
-        val E2 = .5*((brzO2 - brzOut).dot(brzO2 - brzOut))
+        val E1 = .5* (brzO1 - brzOut).dot(brzO1 - brzOut)
+        val E2 = .5* (brzO2 - brzOut).dot(brzO2 - brzOut)
         val dEdW = ( E2 - E1 ) / eps
 
         val gradw = gradient(w)

From a024c6b04dc7256046b70c2bb80a85d1e3569036 Mon Sep 17 00:00:00 2001
From: Alexander Ulanov <nashb@yandex.ru>
Date: Wed, 17 Sep 2014 18:11:20 +0400
Subject: [PATCH 106/143] Forward propagation code sharing

---
 .../mllib/ann/ArtificialNeuralNetwork.scala   | 91 ++++++-------------
 1 file changed, 27 insertions(+), 64 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index aa0d42bedcbbf..ac13119fd9d40 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -69,56 +69,11 @@ import org.apache.spark.util.random.XORShiftRandom
  */
 
 class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topology: Array[Int])
-  extends Serializable {
-
-  private val L = topology.length - 1
-
-  private val ofsWeight: Array[Int] = {
-    val tmp = new Array[Int](L + 1)
-    var curPos = 0
-    tmp(0) = 0
-    var l = 1
-    while(l <= L) {
-      tmp(l) = curPos
-      curPos = curPos + (topology(l - 1) + 1) * topology(l)
-      l += 1
-    }
-    tmp
-  }
-
-  private def g(x: Double) = 1.0 / (1.0 + math.exp(-x))
+  extends Serializable with ANNHelper {
 
   def computeValues(arrData: Array[Double], arrWeights: Array[Double]): Array[Double] = {
-    var arrPrev = new Array[Double](topology(0))
-    var i: Int = 0
-    var j: Int = 0
-    var l: Int = 0
-    i = 0
-    while(i < topology(0)) {
-      arrPrev(i) = arrData(i)
-      i += 1
-    }
-    l = 1
-    while(l <= L) {
-      val arrCur = new Array[Double](topology(l))
-      j = 0
-      while(j < topology(l)) {
-        var cum: Double = 0.0
-        i = 0
-        while( i < topology(l - 1) ) {
-          cum = cum +
-            arrPrev(i) * arrWeights(ofsWeight(l) + (topology(l - 1) + 1) * j + i)
-          i += 1
-        }
-        cum = cum +
-          arrWeights(ofsWeight(l) + (topology(l - 1) + 1) * j + topology(l - 1)) // bias
-        arrCur(j) = g(cum)
-        j += 1
-      }
-      arrPrev = arrCur
-      l += 1
-    }
-    arrPrev
+    val arrNodes = forwardRun(arrData, arrWeights)
+    arrNodes.slice(arrNodes.size - topology(L), arrNodes.size)
   }
 
   def predictPoint(data: Vector, weights: Vector): Double = {
@@ -250,16 +205,14 @@ object ArtificialNeuralNetwork {
     }
     Vectors.dense(initialWeightsArr)
   }
-
 }
 
-private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
-
-  private def g(x: Double) = 1.0 / (1.0 + math.exp(-x))
+private[ann] trait ANNHelper {
+  protected val topology: Array[Int]
+  protected def g(x: Double) = 1.0 / (1.0 + math.exp(-x))
+  protected val L = topology.length - 1
 
-  private val L = topology.length - 1
-
-  private val noWeights = {
+  protected val noWeights = {
     var tmp = 0
     var l = 1
     while(l <= L) {
@@ -269,7 +222,7 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
     tmp
   }
 
-  val ofsWeight: Array[Int] = {
+  protected val ofsWeight: Array[Int] = {
     val tmp = new Array[Int](L + 1)
     var curPos = 0
     tmp(0) = 0
@@ -282,7 +235,7 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
     tmp
   }
 
-  val noNodes: Int = {
+  protected val noNodes: Int = {
     var tmp: Integer = 0
     var l = 0
     while(l < topology.size) {
@@ -292,7 +245,7 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
     tmp
   }
 
-  val ofsNode: Array[Int] = {
+  protected val ofsNode: Array[Int] = {
     val tmp = new Array[Int](L + 1)
     tmp(0) = 0
     var l = 1
@@ -303,16 +256,11 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
     tmp
   }
 
-  override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
-    val arrData = data.toArray
-    val arrWeights = weights.toArray
+  protected def forwardRun(arrData: Array[Double], arrWeights: Array[Double]): Array[Double] = {
     val arrNodes = new Array[Double](noNodes)
-
     var i: Int = 0
     var j: Int = 0
     var l: Int = 0
-
-    // forward run
     i = 0
     while(i < topology(0)) {
       arrNodes(i) = arrData(i)
@@ -336,6 +284,21 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
       }
       l += 1
     }
+    arrNodes
+  }
+}
+
+private class ANNLeastSquaresGradient(val topology: Array[Int]) extends Gradient with ANNHelper {
+
+  override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
+    val arrData = data.toArray
+    val arrWeights = weights.toArray
+
+    var i: Int = 0
+    var j: Int = 0
+    var l: Int = 0
+    // forward run
+    val arrNodes = forwardRun(arrData, arrWeights)
     val arrDiff = new Array[Double](topology(L))
     j = 0
     while( j < topology(L)) {

From 95e5299032b2c5de167f9bade65c4ed640377d24 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Mon, 22 Sep 2014 16:22:27 +0800
Subject: [PATCH 107/143] Update ArtificialNeuralNetwork.scala

Changed optimiser to LBFGS
---
 .../mllib/ann/ArtificialNeuralNetwork.scala   | 192 ++++++++++--------
 1 file changed, 112 insertions(+), 80 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index ac13119fd9d40..fe200f8c1aae2 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -69,52 +69,86 @@ import org.apache.spark.util.random.XORShiftRandom
  */
 
 class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topology: Array[Int])
-  extends Serializable with ANNHelper {
+  extends Serializable {
 
-  def computeValues(arrData: Array[Double], arrWeights: Array[Double]): Array[Double] = {
-    val arrNodes = forwardRun(arrData, arrWeights)
-    arrNodes.slice(arrNodes.size - topology(L), arrNodes.size)
-  }
+  private val L = topology.length - 1
 
-  def predictPoint(data: Vector, weights: Vector): Double = {
-    val outp = computeValues(data.toArray, weights.toArray)
-    outp(0)
+  private val ofsWeight: Array[Int] = {
+    val tmp = new Array[Int](L + 1)
+    var curPos = 0
+    tmp(0) = 0
+    var l = 1
+    while(l <= L) {
+      tmp(l) = curPos
+      curPos = curPos + (topology(l - 1) + 1) * (topology(l))
+      l += 1
+    }
+    tmp
   }
 
-  def predictPointV(data: Vector, weights: Vector): Vector = {
-    Vectors.dense(computeValues(data.toArray, weights.toArray))
+  private def g(x: Double) = 1.0 / (1.0 + math.exp(-x))
+
+  def computeValues(arrData: Array[Double], arrWeights: Array[Double]): Array[Double] = {
+    var arrPrev = new Array[Double](topology(0))
+    var i: Int = 0
+    var j: Int = 0
+    var l: Int = 0
+    i = 0
+    while(i < topology(0)) {
+      arrPrev(i) = arrData(i)
+      i += 1
+    }
+    l = 1
+    while(l <= L) {
+      val arrCur = new Array[Double](topology(l))
+      j = 0
+      while(j < topology(l)) {
+        var cum: Double = 0.0
+        i = 0
+        while( i < topology(l - 1) ) {
+          cum = cum +
+            arrPrev(i) * arrWeights(ofsWeight(l) + (topology(l - 1) + 1) * j + i)
+          i += 1
+        }
+        cum = cum +
+          arrWeights(ofsWeight(l) + (topology(l - 1) + 1) * j + topology(l - 1)) // bias
+        arrCur(j) = g(cum)
+        j += 1
+      }
+      arrPrev = arrCur
+      l += 1
+    }
+    arrPrev
   }
 
   /**
    * Predict values for a single data point using the model trained.
    *
-   * @param testData array representing a single data point
+   * @param testData Vector representing a single data point
    * @return Vector prediction from the trained model
    *
    *         Returns the complete vector.
    */
   def predictV(testData: Vector): Vector = {
-    predictPointV(testData, weights)
+    Vectors.dense(computeValues(testData.toArray, weights.toArray))
   }
 
 }
 
 class ArtificialNeuralNetwork private(
-    private var topology: Array[Int],
-    private var numIterations: Int,
-    private var stepSize: Double,
-    private var miniBatchFraction: Double)
+    topology: Array[Int],
+    maxNumIterations: Int,
+    convergenceTol: Double)
   extends Serializable {
 
-  private val gradient = new ANNLeastSquaresGradient(topology)
-  private val updater = new ANNUpdater()
-  private val optimizer = new GradientDescent(gradient, updater)
-    .setStepSize(stepSize)
-    .setNumIterations(numIterations)
-    .setMiniBatchFraction(miniBatchFraction)
+  private var gradient: Gradient = new ANNLeastSquaresGradient(topology)
+  private var updater: Updater = new ANNUpdater()
+  private var optimizer: Optimizer = new LBFGS(gradient, updater).
+    setConvergenceTol( convergenceTol ).
+    setMaxNumIterations(maxNumIterations)
 
   private def run(input: RDD[(Vector, Vector)], initialWeights: Vector):
-  ArtificialNeuralNetworkModel = {
+      ArtificialNeuralNetworkModel = {
     val data = input.map(v =>
       (0.0,
         Vectors.fromBreeze(DenseVector.vertcat(
@@ -124,54 +158,60 @@ class ArtificialNeuralNetwork private(
     val weights = optimizer.optimize(data, initialWeights)
     new ArtificialNeuralNetworkModel(weights, topology)
   }
+
 }
 
 object ArtificialNeuralNetwork {
 
+  var optimizer: Optimizer = null;
+
   def train(
       input: RDD[(Vector, Vector)],
       topology: Array[Int],
       initialWeights: Vector,
-      numIterations: Int,
-      stepSize: Double,
-      miniBatchFraction: Double): ArtificialNeuralNetworkModel = {
-    new ArtificialNeuralNetwork(topology, numIterations, stepSize, miniBatchFraction)
+      maxNumIterations: Int): ArtificialNeuralNetworkModel = {
+    new ArtificialNeuralNetwork(topology, maxNumIterations, 1e-4)
       .run(input, initialWeights)
   }
 
   def train(
-      input: RDD[(Vector, Vector)],
-      topology: Array[Int],
-      initialWeights: Vector,
-      numIterations: Int,
-      stepSize: Double): ArtificialNeuralNetworkModel = {
-    new ArtificialNeuralNetwork(topology, numIterations, stepSize, 1.0).run(input, initialWeights)
+      input: RDD[(Vector,Vector)],
+      model: ArtificialNeuralNetworkModel,
+      maxNumIterations: Int): ArtificialNeuralNetworkModel = {
+    train(input, model.topology, model.weights, maxNumIterations)
   }
 
   def train(
-      input: RDD[(Vector,Vector)],
-      model: ArtificialNeuralNetworkModel,
-      numIterations: Int,
-      stepSize: Double): ArtificialNeuralNetworkModel = {
-    train(input, model.topology, model.weights, numIterations, stepSize)
+      input: RDD[(Vector, Vector)],
+      topology: Array[Int],
+      maxNumIterations: Int): ArtificialNeuralNetworkModel = {
+    train(input, topology, randomWeights(topology), maxNumIterations)
   }
 
   def train(
       input: RDD[(Vector, Vector)],
       topology: Array[Int],
-      numIterations: Int,
-      stepSize: Double,
-      miniBatchFraction: Double): ArtificialNeuralNetworkModel = {
-    new ArtificialNeuralNetwork(topology, numIterations, stepSize, miniBatchFraction)
-      .run(input, randomWeights(topology))
+      initialWeights: Vector,
+      maxNumIterations: Int,
+      convergenceTol: Double): ArtificialNeuralNetworkModel = {
+    new ArtificialNeuralNetwork(topology, maxNumIterations, convergenceTol)
+      .run(input, initialWeights)
+  }
+
+  def train(
+      input: RDD[(Vector,Vector)],
+      model: ArtificialNeuralNetworkModel,
+      maxNumIterations: Int,
+      convergenceTol: Double): ArtificialNeuralNetworkModel = {
+    train(input, model.topology, model.weights, maxNumIterations, convergenceTol)
   }
 
   def train(
       input: RDD[(Vector, Vector)],
       topology: Array[Int],
-      numIterations: Int,
-      stepSize: Double): ArtificialNeuralNetworkModel = {
-    train(input, topology, numIterations, stepSize, 1.0)
+      maxNumIterations: Int,
+      convergenceTol: Double): ArtificialNeuralNetworkModel = {
+    train(input, topology, randomWeights(topology), maxNumIterations, convergenceTol)
   }
 
   def randomWeights(topology: Array[Int]): Vector = {
@@ -191,28 +231,30 @@ object ArtificialNeuralNetwork {
     }
 
     val initialWeightsArr = new Array[Double](noWeights)
-    var pos = 0
+    var pos = 0;
 
     l = 1
     while( l < topology.length) {
       i = 0
       while(i < (topology(l) * (topology(l - 1) + 1))) {
         initialWeightsArr(pos) = (rand.nextDouble * 4.8 - 2.4) / (topology(l - 1) + 1)
-        pos += 1
+        pos += 1;
         i += 1
       }
       l += 1
     }
     Vectors.dense(initialWeightsArr)
   }
+
 }
 
-private[ann] trait ANNHelper {
-  protected val topology: Array[Int]
-  protected def g(x: Double) = 1.0 / (1.0 + math.exp(-x))
-  protected val L = topology.length - 1
+private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
 
-  protected val noWeights = {
+  private def g(x: Double) = 1.0 / (1.0 + math.exp(-x))
+
+  private val L = topology.length - 1
+
+  private val noWeights = {
     var tmp = 0
     var l = 1
     while(l <= L) {
@@ -222,20 +264,20 @@ private[ann] trait ANNHelper {
     tmp
   }
 
-  protected val ofsWeight: Array[Int] = {
+  val ofsWeight: Array[Int] = {
     val tmp = new Array[Int](L + 1)
-    var curPos = 0
-    tmp(0) = 0
+    var curPos = 0;
+    tmp(0) = 0;
     var l = 1
     while(l <= L) {
       tmp(l) = curPos
-      curPos = curPos + (topology(l - 1) + 1) * topology(l)
+      curPos = curPos + (topology(l - 1) + 1) * (topology(l))
       l += 1
     }
     tmp
   }
 
-  protected val noNodes: Int = {
+  val noNodes: Int = {
     var tmp: Integer = 0
     var l = 0
     while(l < topology.size) {
@@ -245,7 +287,7 @@ private[ann] trait ANNHelper {
     tmp
   }
 
-  protected val ofsNode: Array[Int] = {
+  val ofsNode: Array[Int] = {
     val tmp = new Array[Int](L + 1)
     tmp(0) = 0
     var l = 1
@@ -256,12 +298,17 @@ private[ann] trait ANNHelper {
     tmp
   }
 
-  protected def forwardRun(arrData: Array[Double], arrWeights: Array[Double]): Array[Double] = {
+  override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
+    val arrData = data.toArray
+    val arrWeights = weights.toArray
     val arrNodes = new Array[Double](noNodes)
+
     var i: Int = 0
     var j: Int = 0
     var l: Int = 0
-    i = 0
+
+    // forward run
+    i = 0;
     while(i < topology(0)) {
       arrNodes(i) = arrData(i)
       i += 1
@@ -270,7 +317,7 @@ private[ann] trait ANNHelper {
     while( l <= L ) {
       j = 0
       while(j < topology(l)) {
-        var cum: Double = 0.0
+        var cum: Double = 0.0;
         i = 0
         while(i < topology(l - 1)) {
           cum = cum +
@@ -284,29 +331,14 @@ private[ann] trait ANNHelper {
       }
       l += 1
     }
-    arrNodes
-  }
-}
-
-private class ANNLeastSquaresGradient(val topology: Array[Int]) extends Gradient with ANNHelper {
-
-  override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
-    val arrData = data.toArray
-    val arrWeights = weights.toArray
-
-    var i: Int = 0
-    var j: Int = 0
-    var l: Int = 0
-    // forward run
-    val arrNodes = forwardRun(arrData, arrWeights)
     val arrDiff = new Array[Double](topology(L))
     j = 0
     while( j < topology(L)) {
-      arrDiff(j) = arrNodes(ofsNode(L) + j) - arrData(topology(0) + j)
+      arrDiff(j) = (arrNodes(ofsNode(L) + j) - arrData(topology(0) + j))
       j += 1
     }
 
-    var err: Double = 0
+    var err: Double = 0;
     j = 0
     while(j < topology(L)) {
       err = err + arrDiff(j) * arrDiff(j)
@@ -369,7 +401,7 @@ private class ANNLeastSquaresGradient(val topology: Array[Int]) extends Gradient
       cumGradient: Vector): Double = {
     val (grad, err) = compute(data, label, weights)
     cumGradient.toBreeze += grad.toBreeze
-    err
+    return err
   }
 }
 

From 5a3531b14a7c798ce7438249f2efb6cdd744d666 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Mon, 22 Sep 2014 16:23:51 +0800
Subject: [PATCH 108/143] Update ANNSuite.scala

Update due to optimiser change.
---
 mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
index ddf3ff9009ec4..76d0e3e9da7be 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
@@ -39,7 +39,7 @@ class ANNSuite extends FunSuite with LocalSparkContext {
       (Vectors.dense(features), Vectors.dense(Array(label)))}
     val rddData = sc.parallelize(data, 2)
     val topology = Array[Int](inputSize, hiddenSize, outputSize)
-    val model = ArtificialNeuralNetwork.train(rddData, topology, 2000, 2.0, 1.0)
+    val model = ArtificialNeuralNetwork.train(rddData, topology, 100, 1e-5)
     val predictionAndLabels = rddData.map { case(input, label) =>
       (model.predictV(input)(0), label(0)) }.collect()
     assert(predictionAndLabels.forall { case(p, l) => (math.round(p) - l) == 0 })

From 85050bafbf99d9cf22dc06a772853cddd7746e4f Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Mon, 22 Sep 2014 16:25:08 +0800
Subject: [PATCH 109/143] Delete TestANN.scala

Current version makes no sense with fast LBFGS algorithm. Adopted version moved to examples.
---
 .../org/apache/spark/mllib/ann/TestANN.scala  | 581 ------------------
 1 file changed, 581 deletions(-)
 delete mode 100644 mllib/src/test/scala/org/apache/spark/mllib/ann/TestANN.scala

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestANN.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/TestANN.scala
deleted file mode 100644
index 9e6f59df3a11e..0000000000000
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/TestANN.scala
+++ /dev/null
@@ -1,581 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-
-package org.apache.spark.mllib.ann
-
-import java.awt._
-import java.awt.event._
-import java.text.SimpleDateFormat
-import java.util.Calendar
-import org.apache.spark._
-import org.apache.spark.mllib.ann._
-import org.apache.spark.mllib.linalg._
-import org.apache.spark.mllib.regression._
-import org.apache.spark.rdd.RDD
-import scala.Array.canBuildFrom
-import scala.util.Random
-
-object windowAdapter extends WindowAdapter {
-
-  override def windowClosing( e: WindowEvent ) {
-    System.exit(0)
-  }
-
-}
-
-class OutputCanvas2D( wd: Int, ht: Int ) extends Canvas {
-
-  var points: Array[Vector] = null
-  var approxPoints: Array[Vector] = null
-
-  /* input: rdd of (x,y) vectors */
-  def setData( rdd: RDD[Vector] ) {
-    points = rdd.collect
-    repaint
-  }
-
-  def setApproxPoints( rdd: RDD[Vector] ) {
-    approxPoints = rdd.collect
-    repaint
-  }
-
-  def plotDot( g: Graphics, x: Int, y: Int ) {
-    val r = 5
-    val noSamp = 6*r
-    var x1 = x
-    var y1 = y + r
-    for( j <- 1 to noSamp ) {
-      val x2 = (x.toDouble + math.sin( j.toDouble*2*math.Pi/noSamp )*r + .5).toInt
-      val y2 = (y.toDouble + math.cos( j.toDouble*2*math.Pi/noSamp )*r + .5).toInt
-      g.drawLine( x1, ht - y1, x2, ht - y2 )
-      x1 = x2
-      y1 = y2
-    }
-  }
-
-  override def paint( g: Graphics) = {
-
-    var xmax: Double = 0.0
-    var xmin: Double = 0.0
-    var ymax: Double = 0.0
-    var ymin: Double = 0.0
-
-    if( points!=null ) {
-
-      g.setColor( Color.black )
-      val x = points.map( T => (T.toArray)(0) )
-      val y = points.map( T => (T.toArray)(1) )
-
-      xmax = x.max
-      xmin = x.min
-      ymax = y.max
-      ymin = y.min
-
-      for( i <- 0 to x.size - 1 ) {
-
-        val xr = (((x(i).toDouble - xmin)/(xmax - xmin))*wd + .5).toInt
-        val yr = (((y(i).toDouble - ymin)/(ymax - ymin))*ht + .5).toInt
-        plotDot( g, xr, yr )
-
-      }
-
-      if( approxPoints != null ) {
-
-        g.setColor( Color.red )
-        val x = approxPoints.map( T => (T.toArray)(0) )
-        val y = approxPoints.map( T => (T.toArray)(1) )
-
-        for( i <- 0 to x.size-1 ) {
-          val xr = (((x(i).toDouble - xmin)/(xmax - xmin))*wd + .5).toInt
-          val yr = (((y(i).toDouble - ymin)/(ymax - ymin))*ht + .5).toInt
-          plotDot( g, xr, yr )
-        }
-
-      }
-
-    }
-
-  }
-
-}
-
-class OutputFrame2D( title: String ) extends Frame( title ) {
-
-  val wd = 800
-  val ht = 600
-
-  var outputCanvas = new OutputCanvas2D( wd, ht )
-
-  def apply() {
-    addWindowListener( windowAdapter )
-    setSize( wd, ht )
-    add( "Center", outputCanvas )
-    show()
-  }
-
-  def setData( rdd: RDD[Vector] ) {
-    outputCanvas.setData( rdd )
-  }
-
-  def setApproxPoints( rdd: RDD[Vector] ) {
-    outputCanvas.setApproxPoints( rdd )
-  }
-
-
-}
-
-object windowAdapter3D extends WindowAdapter {
-
-  override def windowClosing( e: WindowEvent ) {
-    System.exit(0)
-  }
-
-}
-
-class OutputCanvas3D( wd: Int, ht: Int, shadowFrac: Double ) extends Canvas {
-
-  var angle: Double = 0
-  var points: Array[Vector] = null
-  var approxPoints: Array[Vector] = null
-
-  /* 3 dimensional (x,y,z) vector */
-  def setData( rdd: RDD[Vector] ) {
-    points = rdd.collect
-    repaint
-  }
-
-  def setApproxPoints( rdd: RDD[Vector] ) {
-    approxPoints = rdd.collect
-    repaint
-  }
-
-  def plotDot( g: Graphics, x: Int, y: Int ) {
-    val r = 5
-    val noSamp = 6*r
-    var x1 = x
-    var y1 = y + r
-    for( j <- 1 to noSamp ) {
-      val x2 = (x.toDouble + math.sin( j.toDouble*2*math.Pi/noSamp )*r + .5).toInt
-      val y2 = (y.toDouble + math.cos( j.toDouble*2*math.Pi/noSamp )*r + .5).toInt
-      g.drawLine( x1, ht - y1, x2, ht - y2 )
-      x1 = x2
-      y1 = y2
-    }
-  }
-
-  def plotLine( g: Graphics, x1: Int, y1: Int, x2: Int, y2: Int ) {
-    g.drawLine( x1, ht - y1, x2, ht - y2 )
-  }
-
-  def calcCord( arr: Array[Double], angle: Double ): (Double, Double, Double, Double, Double, Double) = {
-
-    var arrOut = new Array[Double](6)
-
-    val x = arr(0)*math.cos( angle ) - arr(1)*math.sin( angle )
-    val y = arr(0)*math.sin( angle ) + arr(1)*math.cos( angle )
-    val z = arr(2)
-
-    val x0 = arr(0)*math.cos( angle ) - arr(1)*math.sin( angle )
-    val y0 = arr(0)*math.sin( angle ) + arr(1)*math.cos( angle )
-    val z0 = 0
-
-    val xs = (arr(0) + shadowFrac*arr(2))*math.cos( angle ) - arr(1)*math.sin( angle )
-    val ys = (arr(0) + shadowFrac*arr(2))*math.sin( angle ) + arr(1)*math.cos( angle )
-    val zs = 0
-
-    arrOut(0) = y - .5*x
-    arrOut(1) = z - .25*x
-
-    arrOut(2) = y0 - .5*x0
-    arrOut(3) = z0 - .25*x0
-
-    arrOut(4) = ys - .5*xs
-    arrOut(5) = zs - .25*xs
-
-    ( arrOut(0), arrOut(1), arrOut(2), arrOut(3), arrOut(4), arrOut(5) )
-
-  }
-
-  override def paint( g: Graphics) = {
-
-    if( points!=null ) {
-
-      var p = points.map( T => calcCord( T.toArray, angle ) ).toArray
-
-      var xmax = p(0)._1
-      var xmin = p(0)._1
-      var ymax = p(0)._2
-      var ymin = p(0)._2
-
-      for( i <- 0 to p.size-1 ) {
-
-        if( xmax<p(i)._1 ) {
-          xmax = p(i)._1
-        }
-        if( xmax<p(i)._3 ) {
-          xmax = p(i)._3
-        }
-        if( xmax<p(i)._5 ) {
-          xmax = p(i)._5
-        }
-
-        if( xmin>p(i)._1 ) {
-          xmin = p(i)._1
-        }
-        if( xmin>p(i)._3 ) {
-          xmin = p(i)._3
-        }
-        if( xmin>p(i)._5 ) {
-          xmin = p(i)._5
-        }
-
-        if( ymax<p(i)._2 ) {
-          ymax = p(i)._2
-        }
-        if( ymax<p(i)._4 ) {
-          ymax = p(i)._4
-        }
-        if( ymax<p(i)._6 ) {
-          ymax = p(i)._6
-        }
-
-        if( ymin>p(i)._2 ) {
-          ymin = p(i)._2
-        }
-        if( ymin>p(i)._4 ) {
-          ymin = p(i)._4
-        }
-        if( ymin>p(i)._6 ) {
-          ymin = p(i)._6
-        }
-
-      }
-
-      for( i <- 0 to p.size-1 ) {
-
-        var x_ = (((p(i)._1 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
-        var y_ = (((p(i)._2 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
-        var x0 = (((p(i)._3 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
-        var y0 = (((p(i)._4 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
-        var xs = (((p(i)._5 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
-        var ys = (((p(i)._6 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
-
-        g.setColor( Color.black )
-        plotDot( g, x_, y_ )
-        plotLine( g, x_, y_, x0, y0 )
-        g.setColor( Color.gray )
-        plotLine( g, x0, y0, xs, ys )
-
-      }
-
-      if( approxPoints != null ) {
-
-        var p = approxPoints.map( T => calcCord( T.toArray, angle ) )
-
-        for( i <- 0 to p.size-1 ) {
-
-          var x_ = (((p(i)._1 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
-          var y_ = (((p(i)._2 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
-          var x0 = (((p(i)._3 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
-          var y0 = (((p(i)._4 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
-          var xs = (((p(i)._5 - xmin)/(xmax - xmin))*(wd - 40) + 20.5).toInt
-          var ys = (((p(i)._6 - ymin)/(ymax - ymin))*(ht - 40) + 20.5).toInt
-
-          g.setColor( Color.red )
-          plotDot( g, x_, y_ )
-          plotLine( g, x_, y_, x0, y0 )
-          g.setColor( Color.magenta )
-          plotLine( g, x0, y0, xs, ys )
-
-        }
-
-      }
-
-    }
-  }
-}
-
-class OutputFrame3D( title: String, shadowFrac: Double ) extends Frame( title ) {
-
-  val wd = 800
-  val ht = 600
-
-  def this( title: String ) = this( title, .25 )
-
-  var outputCanvas = new OutputCanvas3D( wd, ht, shadowFrac )
-
-  def apply() {
-    addWindowListener( windowAdapter3D )
-    setSize( wd, ht )
-    add( "Center", outputCanvas )
-    show()
-  }
-
-  def setData( rdd: RDD[Vector] ) {
-    outputCanvas.setData( rdd )
-  }
-
-  def setAngle( angle: Double ) {
-    outputCanvas.angle = angle
-  }
-
-  def setApproxPoints( rdd: RDD[Vector] ) {
-    outputCanvas.setApproxPoints( rdd )
-  }
-
-}
-
-object TestANN {
-
-  var rand = new Random( 0 )
-
-  def generateInput2D( f: Double => Double, xmin: Double, xmax: Double, noPoints: Int ): Array[(Vector,Vector)] =
-  {
-
-    var out = new Array[(Vector,Vector)](noPoints)
-
-    for( i <- 0 to noPoints - 1 ) {
-      val x = xmin + rand.nextDouble()*(xmax - xmin)
-      val y = f(x)
-      out(i) = ( Vectors.dense( x ), Vectors.dense( y ) )
-    }
-
-    return out
-
-  }
-
-
-  def generateInput3D( f: (Double,Double) => Double, xmin: Double, xmax: Double, ymin: Double, ymax: Double, noPoints: Int ): Array[(Vector,Vector)] = {
-
-    var out = new Array[(Vector,Vector)](noPoints)
-
-    for( i <- 0 to noPoints - 1 ) {
-
-      val x = xmin + rand.nextDouble()*(xmax - xmin)
-      val y = ymin + rand.nextDouble()*(ymax - ymin)
-      val z = f( x, y )
-
-      var arr = new Array[Double](2)
-
-      arr(0) = x
-      arr(1) = y
-      out(i) = ( Vectors.dense( arr ), Vectors.dense( z ) )
-
-    }
-
-    out
-
-  }
-
-  def generateInput4D( f: Double => (Double,Double,Double), tmin: Double, tmax: Double, noPoints: Int ): Array[(Vector,Vector)] = {
-
-    var out = new Array[(Vector,Vector)](noPoints)
-
-    for( i <- 0 to noPoints - 1 ) {
-
-      val t: Double = tmin + rand.nextDouble()*(tmax - tmin)
-      var arr = new Array[Double](3)
-      var F = f(t)
-
-      arr(0) = F._1
-      arr(1) = F._2
-      arr(2) = F._3
-
-      out(i) = ( Vectors.dense( t ), Vectors.dense( arr ) )
-    }
-
-    out
-
-  }
-
-  def f( T: Double ): Double = {
-    val y = 0.5 + Math.abs(T/5).toInt.toDouble*.15 + math.sin(T*math.Pi/10)*.1
-    assert( y<= 1)
-    y
-  }
-
-  def f3D( x: Double, y: Double ): Double = {
-    .5 + .24*Math.sin( x*2*math.Pi/10 ) + .24*Math.cos( y*2*math.Pi/10 )
-  }
-
-  def f4D( t: Double ): (Double, Double,Double) = {
-    val x = Math.abs(.8*Math.cos( t*2*math.Pi/20 ) ) + .1
-    val y = (11 + t)/22
-    val z = .5 + .35*Math.sin(t*2*math.Pi/5)*Math.cos( t*2*math.Pi/10 ) + .15*t/11
-    ( x, y, z )
-  }
-
-  def concat( v1: Vector, v2: Vector ): Vector = {
-
-    var a1 = v1.toArray
-    var a2 = v2.toArray
-    var a3 = new Array[Double]( a1.size + a2.size )
-
-    for( i <- 0 to a1.size - 1 ) {
-      a3(i) = a1(i)
-    }
-
-    for( i <- 0 to a2.size - 1 ) {
-      a3(i + a1.size) = a2(i)
-    }
-
-    Vectors.dense( a3 )
-
-  }
-
-  def main( arg: Array[String] ) {
-
-    println( "ANN tester" )
-    println
-
-    val formatter = new SimpleDateFormat("hh:mm:ss")    
-
-    var curAngle: Double = 0.0
-    var graphic: Boolean = false
-
-    if( (arg.length>0) && (arg(0)=="graph" ) ) {
-      graphic = true
-    }
-
-    var outputFrame2D: OutputFrame2D = null
-    var outputFrame3D: OutputFrame3D = null
-    var outputFrame4D: OutputFrame3D = null
-
-    if( graphic ) {
-
-      outputFrame2D = new OutputFrame2D( "x -> y" )
-      outputFrame2D.apply
-
-      outputFrame3D = new OutputFrame3D( "(x,y) -> z", 1 )
-      outputFrame3D.apply
-
-      outputFrame4D = new OutputFrame3D( "t -> (x,y,z)" )
-      outputFrame4D.apply
-
-    }
-
-    var A = 20.0
-    var B = 50.0
-
-    var conf = new SparkConf().setAppName("Parallel ANN").setMaster("local[1]")
-    var sc = new SparkContext(conf)
-
-    val testRDD2D = sc.parallelize( generateInput2D( T => f(T), -10, 10, 100 ), 2).cache
-    val testRDD3D = sc.parallelize( generateInput3D( (x,y) => f3D(x,y), -10, 10, -10, 10, 200 ), 2).cache
-    val testRDD4D = sc.parallelize( generateInput4D( t => f4D(t), -10, 10, 100 ), 2 ).cache
-    
-    val validationRDD2D = sc.parallelize( generateInput2D( T => f(T), -10, 10, 100 ), 2).cache
-    val validationRDD3D = sc.parallelize( generateInput3D( (x,y) => f3D(x,y), -10, 10, -10, 10, 100 ), 2).cache
-    val validationRDD4D = sc.parallelize( generateInput4D( t => f4D(t), -10, 10, 100 ), 2 ).cache
-
-    if( graphic ) {
-
-      outputFrame2D.setData( testRDD2D.map( T => concat( T._1, T._2 ) ) )
-      outputFrame3D.setData( testRDD3D.map( T => concat( T._1, T._2 ) ) )
-      outputFrame4D.setData( testRDD4D.map( T => T._2 ) )
-
-    }
-    
-    val starttime = Calendar.getInstance().getTime()
-    println( "Start training " + starttime )
-
-    var model2D = ArtificialNeuralNetwork.train( testRDD2D, Array[Int](1, 3, 3, 1), 1000, 1.0)
-    var model3D = ArtificialNeuralNetwork.train( testRDD3D, Array[Int](2, 20, 1), 1000, 1.0)
-    var model4D = ArtificialNeuralNetwork.train( testRDD4D, Array[Int](1, 20, 3), 1000, 1.0 )
-
-    val noIt = 1500
-    var errHist = new Array[(Int,Double,Double,Double)]( noIt )
-
-    for( i <- 0 to noIt - 1 ) {
-
-      val predictedAndTarget2D = validationRDD2D.map( T => ( T._1, T._2, model2D.predictV( T._1 ) ) )
-      val predictedAndTarget3D = validationRDD3D.map( T => ( T._1, T._2, model3D.predictV( T._1 ) ) )
-      val predictedAndTarget4D = validationRDD4D.map( T => ( T._1, T._2, model4D.predictV( T._1 ) ) )
-
-      var err2D = predictedAndTarget2D.map( T =>
-        (T._3.toArray(0) - T._2.toArray(0))*(T._3.toArray(0) - T._2.toArray(0))
-      ).reduce( (u,v) => u + v )
-
-      var err3D = predictedAndTarget3D.map( T =>
-        (T._3.toArray(0) - T._2.toArray(0))*(T._3.toArray(0) - T._2.toArray(0))
-      ).reduce( (u,v) => u + v )
-
-      var err4D = predictedAndTarget4D.map( T => {
-
-        val v1 = T._2.toArray
-        val v2 = T._3.toArray
-
-        (v1(0) - v2(0))*(v1(0) - v2(0)) +
-        (v1(1) - v2(1))*(v1(1) - v2(1)) +
-        (v1(2) - v2(2))*(v1(2) - v2(2))
-
-      } ).reduce( (u,v) => u + v )
-
-
-      if( graphic ) {
-
-        val predicted2D = predictedAndTarget2D.map(
-          T => concat( T._1, T._3 )
-        )
-
-        val predicted3D = predictedAndTarget3D.map(
-          T => concat( T._1, T._3 )
-        )
-
-        val predicted4D = predictedAndTarget4D.map(
-          T => T._3
-        )
-
-        curAngle = curAngle + math.Pi/4
-        if( curAngle>=2*math.Pi ) {
-          curAngle = curAngle - 2*math.Pi
-        }
-
-        outputFrame3D.setAngle( curAngle )
-        outputFrame4D.setAngle( curAngle )
-
-        outputFrame2D.setApproxPoints( predicted2D )
-        outputFrame3D.setApproxPoints( predicted3D )
-        outputFrame4D.setApproxPoints( predicted4D )
-
-      }
-
-      println( "It. "+i+" ("+Calendar.getInstance().getTime()+"), Error 2D/3D/4D: " + (err2D, err3D, err4D) )
-      errHist(i) = ( i, err2D, err3D, err4D )
-
-      if( i < noIt - 1 ) {
-        model2D = ArtificialNeuralNetwork.train(testRDD2D, model2D, 1000, 1.0)
-        model3D = ArtificialNeuralNetwork.train(testRDD3D, model3D, 1000, 1.0)
-        model4D = ArtificialNeuralNetwork.train(testRDD4D, model4D, 1000, 1.0)
-      }
-
-    }
-
-    sc.stop
-
-    val stoptime = Calendar.getInstance().getTime()
-
-    for( i <- 0 to noIt - 1 ) {
-      println( errHist(i) )
-    }
-
-    println( formatter.format( starttime )+"-" + formatter.format( stoptime ) + " "+(stoptime.getTime-starttime.getTime+500)/1000+" seconds" )
-
-  }
-
-}

From 5f513052f5687d260dc747378439f244443fc8b8 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Mon, 22 Sep 2014 16:27:35 +0800
Subject: [PATCH 110/143] Create ANNDemo.scala

Demo of ANN with LBFGS. Can consider whether we want to keep this, as there is much overhead due to the graphical representation.
---
 .../org/apache/spark/examples/ANNDemo.scala   | 20 +++++++++----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/examples/src/main/scala/org/apache/spark/examples/ANNDemo.scala b/examples/src/main/scala/org/apache/spark/examples/ANNDemo.scala
index dd981f90e9cff..14e9a2d61cb10 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ANNDemo.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ANNDemo.scala
@@ -21,13 +21,11 @@ import java.awt._
 import java.awt.event._
 import java.text.SimpleDateFormat
 import java.util.Calendar
-
 import org.apache.spark._
 import org.apache.spark.mllib.ann._
 import org.apache.spark.mllib.linalg._
 import org.apache.spark.mllib.regression._
 import org.apache.spark.rdd.RDD
-
 import scala.Array.canBuildFrom
 import scala.util.Random
 
@@ -369,7 +367,7 @@ object ANNDemo {
   }
 
 
-  def generateInput3D(f: (Double,Double) => Double, xmin: Double, xmax: Double,
+  def generateInput3D(f: (Double,Double) => Double, xmin: Double, xmax: Double, 
       ymin: Double, ymax: Double, noPoints: Int): Array[(Vector,Vector)] = {
 
     var out = new Array[(Vector,Vector)](noPoints)
@@ -486,7 +484,7 @@ object ANNDemo {
 
     val validationRDD2D =
       sc.parallelize(generateInput2D( T => f(T), -10, 10, 100 ), 2).cache
-    val validationRDD3D =
+    val validationRDD3D = 
       sc.parallelize(generateInput3D( (x,y) => f3D(x,y), -10, 10, -10, 10, 100 ), 2).cache
     val validationRDD4D =
       sc.parallelize( generateInput4D( t => f4D(t), -10, 10, 100 ), 2 ).cache
@@ -497,25 +495,25 @@ object ANNDemo {
 
     var starttime = Calendar.getInstance().getTime()
     println("Training 2D")
-    var model2D = ArtificialNeuralNetwork.train(testRDD2D, Array[Int](5, 3), 1000, 1e-8)
+    var model2D = ArtificialNeuralNetwork.train(testRDD2D, Array[Int](1, 5, 3, 1), 1000, 1e-8)
     var stoptime = Calendar.getInstance().getTime()
     println(((stoptime.getTime-starttime.getTime + 500) / 1000) + "s")
 
     starttime = stoptime
     println("Training 3D")
-    var model3D = ArtificialNeuralNetwork.train(testRDD3D, Array[Int](20), 1000, 1e-8)
+    var model3D = ArtificialNeuralNetwork.train(testRDD3D, Array[Int](2, 20, 1), 1000, 1e-8)
     stoptime = Calendar.getInstance().getTime()
     println(((stoptime.getTime-starttime.getTime + 500) / 1000) + "s")
 
     starttime = stoptime
     println("Training 4D")
-    var model4D = ArtificialNeuralNetwork.train(testRDD4D, Array[Int](20), 1000, 1e-8)
+    var model4D = ArtificialNeuralNetwork.train(testRDD4D, Array[Int](1, 20, 3), 1000, 1e-8)
     stoptime = Calendar.getInstance().getTime()
     println(((stoptime.getTime-starttime.getTime + 500) / 1000) + "s")
 
-    val predictedAndTarget2D = validationRDD2D.map(T => (T._1, T._2, model2D.predict(T._1)))
-    val predictedAndTarget3D = validationRDD3D.map(T => (T._1, T._2, model3D.predict(T._1)))
-    val predictedAndTarget4D = validationRDD4D.map(T => (T._1, T._2, model4D.predict(T._1)))
+    val predictedAndTarget2D = validationRDD2D.map(T => (T._1, T._2, model2D.predictV(T._1)))
+    val predictedAndTarget3D = validationRDD3D.map(T => (T._1, T._2, model3D.predictV(T._1)))
+    val predictedAndTarget4D = validationRDD4D.map(T => (T._1, T._2, model4D.predictV(T._1)))
 
     var err2D = predictedAndTarget2D.map( T =>
       (T._3.toArray(0) - T._2.toArray(0))*(T._3.toArray(0) - T._2.toArray(0))
@@ -556,7 +554,7 @@ object ANNDemo {
 
     while(true) { // stops when closing the window
 
-      curAngle = curAngle + math.Pi/8
+      curAngle = curAngle + math.Pi/4
       if(curAngle >= 2*math.Pi) {
         curAngle = curAngle - 2*math.Pi
       }

From 95ed2a2115163e99eec76b9bf2385e5225e603c6 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Mon, 22 Sep 2014 16:28:56 +0800
Subject: [PATCH 111/143] Update mllib-ann.md

Update with back-propagation and LBFGS.
---
 docs/mllib-ann.md | 243 +++++++++++++++++++++++-----------------------
 1 file changed, 123 insertions(+), 120 deletions(-)

diff --git a/docs/mllib-ann.md b/docs/mllib-ann.md
index 005a5be4987d9..4a1acd5f380da 100644
--- a/docs/mllib-ann.md
+++ b/docs/mllib-ann.md
@@ -10,167 +10,170 @@ This document describes the MLlib's Artificial Neural Network (ANN) implementati
 
 The implementation currently consist of the following files:
 
-* 'ParallelANN.scala': implements the ANN
-* 'GeneralizedSteepestDescentAlgorithm.scala': provides an abstract class and model as basis for 'ParallelANN'.
-
-In addition, there is a demo/test available:
-
-* 'TestParallelANN.scala': tests parallel ANNs for various functions
-* 'TestParallelANNgraphics.scala': graphical output for 'TestParallelANN.scala'
+* 'ArtificialNeuralNetwork.scala': implements the ANN
+* 'ANNSuite': implements automated tests for the ANN and its gradient
+* 'ANNDemo': a demo that approximates three functions and shows a graphical representation of
+the result
 
 # Architecture and Notation
 
-The file ParallelANN.scala implements a three-layer ANN with the following architecture:
+The file ArtificialNeuralNetwork.scala implements the ANN. The following picture shows the
+architecture of a 3-layer ANN:
 
 ```
  +-------+
  |       |
- |  X_0  |
+ | N_0,0 |
  |       | 
- +-------+       +-------+
-                 |       |
- +-------+       |  H_0  |      +-------+
- |       |       |       |      |       |
- |  X_1  |-      +-------+    ->|  O_0  |
- |       | \ Vij             /  |       |
- +-------+  -    +-------+  -   +-------+
-              \  |       | / Wjk
-     :         ->|  H_1  |-     +-------+
-     :           |       |      |       |
-     :           +-------+      |  O_1  |
-     :                          |       |
-     :               :          +-------+
-     :               :
-     :               :              :
-     :               : 
-     :               :          +-------+
-     :               :          |       |
-     :               :          | O_K-1 |
-     :                          |       |
-     :           +-------+      +-------+
-     :           |       |
-     :           | H_J-1 |
-                 |       |
- +-------+       +-------+
+ +-------+        +-------+
+                  |       |
+ +-------+        | N_0,1 |       +-------+
+ |       |        |       |       |       |
+ | N_1,0 |-       +-------+     ->| N_0,2 |
+ |       | \ Wij1              /  |       |
+ +-------+  --    +-------+  --   +-------+
+               \  |       | / Wjk2
+     :          ->| N_1,1 |-      +-------+
+     :            |       |       |       |
+     :            +-------+       | N_1,2 |
+     :                            |       |
+     :                :           +-------+
+     :                :
+     :                :                :
+     :                : 
+     :                :           +-------+
+     :                :           |       |
+     :                :           |N_K-1,2|
+     :                            |       |
+     :            +-------+       +-------+
+     :            |       |
+     :            |N_J-1,1|
+                  |       |
+ +-------+        +-------+
  |       | 
- | X_I-1 |  
+ |N_I-1,0|  
  |       |
  +-------+
 
- +-------+      +--------+
- |       |      |        |
- |   -1  |      |   -1   |
- |       |      |        |
- +-------+      +--------+
+ +-------+        +--------+
+ |       |        |        |
+ |   -1  |        |   -1   |
+ |       |        |        |
+ +-------+        +--------+
 
-INPUT LAYER     HIDDEN LAYER    OUTPUT LAYER
+INPUT LAYER      HIDDEN LAYER    OUTPUT LAYER
 ```
 
-The nodes X_0 to X_{I-1} are the I input nodes. The nodes H_0 to H_{J-1} are the J hidden nodes and the nodes O_0 to O_{K-1} are the K output nodes. Between each input node X_i and hidden node H_j there is a weight V_{ij}. Likewise, between each hidden node H_j and each output node O_k is a weight W_{jk}. 
-
-The ANN also implements two bias units. These are nodes that always output the value -1. The bias units are in the input and in the hidden layer. They act as normal nodes, except that the bias unit in the hidden layer has no input. The bias units can also be denoted by X_I and H_J.
+The i-th node in layer l is denoted by N_{i,l}, both i and l starting with 0. The weight
+between node i in layer l-1 and node j in layer l is denoted by Wijl. Layer 0 is the input
+layer, whereas layer L is the output layer.
 
-The value of a hidden node H_j is calculated as follows:
+The ANN also implements bias units. These are nodes that always output the value -1. The bias
+units are in all layers except the output layer. They act similar to other nodes, but do not
+have input.
 
-`$H_j = g ( \sum_{i=0}^{I} X_i*V_{i,j} )$`
+The value of node N_{j,l} is calculated  as follows:
 
-Likewise, the value of the output node O_k is calculated as follows:
-
-`$O_k = g( \sum_{j=0}^{J} H_j*W_{j,k} )$`
+`$N_{j,l} = g( \sum_{i=0}^{topology_l} W_{i,j,l)*N_{i,l-1} )$`
 
 Where g is the sigmod function
 
 `$g(t) = \frac{e^{\beta t} }{1+e^{\beta t}}$`
 
-and `$\beta$` defines the steepness of g.
-
-# Gradient descent
-
-Currently, the MLlib uses gradent descent for training. This means that the weights V_{ij} and W_{jk} are updated by adding a fraction of the gradient to V_{ij} and W_{jk} of the following function:
-
-`$E = \sum_{k=0}^{K-1} (O_k - Y_k )^2$`
-
-where Y_k is the target output given inputs X_0 ... X_{I-1}
-
-Calculations provide that:
+# LBFGS
 
-`$\frac{\partial E}{\partial W_{jk}} = 2 (O_k-Y_k) \cdot H_j \cdot g' \left( \sum_{m=0}^{J} W_{mk} H_m \right)$`
+MLlib uses the LBFGS algorithm for training. It minimises the following error function:
 
-and
+`$E = \sum_{k=0}^{K-1} (N_{k,L} - Y_k )^2$`
 
-`$\frac{\partial E}{\partial V_{ij}} = 2 \sum_{k=0}^{K-1} \left( (O_k - Y_k)  \cdot X_i \cdot W_{jk} \cdot g'\left( \sum_{n=0}^{J} W_{nk} H_n \right) g'\left( \sum_{m=0}^{I} V_{mj} X_i \right) \right)$`
-
-The training step consists of the two operations
-
-`$V_{ij} = V_{ij} - \epsilon \frac{\partial E}{\partial V_{ij}}$`
-
-and
-
-`$W_{jk} = W_{jk} - \epsilon \frac{\partial E}{\partial W_{jk}}$`
-
-where `$\epsilon$` is the step size.
+where Y_k is the target output given inputs N_{0,0} ... N_{I-1,0}.
 
 # Implementation Details
 
-## The 'ParallelANN' class
-
-The 'ParallelANN' class is the main class of the ANN. This class uses a trait 'ANN', which includes functions for calculating the hidden layer ('computeHidden') and calculation of the output ('computeValues'). The output of 'computeHidden' includes the bias node in the hidden layer, such that it does not need to handle the hidden bias node differently.
-
-The 'ParallelANN' class has the following constructors:
-
-* `ParallelANN( stepSize, numIterations, miniBatchFraction, noInput, noHidden, noOutput, beta )`
-* `ParallelANN()`: assumes 'stepSize'=1.0, 'numIterations'=100, 'miniBatchFraction'=1.0, 'noInput'=1, 'noHidden'=5, noOutput'=1, 'beta'=1.0.
-* `ParallelANN( noHidden )`: as 'ParallelANN()', but allows specification of 'noHidden'
-* `ParallelANN( noInput, noHidden )`: as 'ParallelANN()', but allows specification of number of 'noInput' and 'noHidden'
-* `ParallelANN( noInput, noHidden, noOutput )`: as 'ParallelANN()', but allows specification of 'noInput', 'noHidden' and 'noOutput'
-
-The number of input nodes I is stored in the variable 'noInput', the number of hidden nodes J is stored in 'noHidden' and the number of output nodes K is stored in 'noOutput'. 'beta' contains the value of `$\beta$` for the sigmoid function.
-
-The parameters 'stepSize', 'numIterations' and 'miniBatchFraction' are of use for the Statistical Gradient Descent function.
-
-In addition, it has a single vector 'weights' corresponding to V_{ij} and W_{jk}. The mapping of V_{ij} and W_{jk} into 'weights' is as follows:
-
-* V_{ij} -> `weights[  i + j*(noInput+1) ]`
-* W_{jk} -> `weights[ (noInput+1)*noHidden + j + k*(noHidden+1) ]`
-
-The training function carries the name 'train'. It can take various inputs:
-
-* `def train( rdd: RDD[(Vector,Vector)] )`: starts a complete new training session and generates a new ANN.
-* `def train( rdd: RDD[(Vector,Vector)], model: ParallelANNModel )`: continues a training session with an existing ANN.
-* `def train( rdd: RDD[(Vector,Vector)], weights: Vector )`: starts a training session using initial weights as indicated by 'weights'.
-
-The input of the training function is an RDD with (input/output) training pairs, each input and output being stored as a 'Vector'. The training function returns a variable of from class 'ParallelANNModel', as described below.
-
-## The 'ParallelANNModel' class
-
-All information needed for the ANN is stored in the 'ParallelANNModel' class. The training function 'train' from 'ParallelANN' returns an object from the 'ParallelANNModel' class.
-
-The information in 'parallelANNModel' consist of the weights, the number of input, hidden and output nodes, as well as two functions 'predictPoint' and 'predictPointV'.
-
-The 'predictPoint' function is used to calculate a single output value as a 'Double'. If the output of the ANN actually is a vector, it returns just the first element of the vector, that is O_0. The output of the 'predictPointV' is of type 'Vector', and returns all K output values.
-
-## The 'GeneralizedSteepestDescentAlgorithm' class
-
-The 'GeneralizedSteepestDescendAlgorithm' class is based on the 'GeneralizedLinearAlgorithm' class. The main difference is that the 'GeneralizedSteepestDescentAlgorithm' is based on output values of type 'Vector', whereas 'GeneralizedLinearAlgorithm' is based of output values of type 'Double'. The new class was needed, because an ANN ideally outputs multiple values, hence a 'Vector'.
+## The `ArtificialNeuralNetwork` class
+
+The `ArtificialNeuralNetwork` class has the following constructor:
+
+`class ArtificialNeuralNetwork private(topology: Array[Int], maxNumIterations: Int,
+convergenceTol: Double)`
+
+* `topology` is an array of integers indicating then number of nodes per layer. For example, if
+`topology` holds `(3, 5, 1)`, it means that there are three input nodes, five nodes in a single
+hidden layer and 1 output node.
+* `maxNumIterations` indicates the number of iterations after which the LBFGS algorithm must
+have stopped.
+* `convergenceTol` indicates the acceptable error, and if reached the LBFGS algorithm will
+stop. A lower number of `convergenceTol` will give a higher precision.
+
+There is also an object `ArtificialNeuralNetwork`. This object contains the training function.
+There are six different instances of the training function, each for use with different
+parameters. All take as the first parameter the RDD `input`, which contains pairs of input and
+output vectors.
+
+* `def train(input: RDD[(Vector, Vector)], topology: Array[Int], maxNumIterations: Int):
+ArtificialNeuralNetworkModel`: starts training with random initial weights, and a default
+`convergenceTol`=1e-5.
+* `def train(input: RDD[(Vector, Vector)], topology: Array[Int], initialWeights: Vector,
+maxNumIterations: Int): ArtificialNeuralNetworkModel`: starts training with given initial
+weights, and a default `convergenceTol`=1e-5.
+* `def train(input: RDD[(Vector, Vector)], model: ArtificialNeuralNetworkModel,
+maxNumIterations: Int): ArtificialNeuralNetworkModel`: resumes training given an earlier
+calculated model, and a default `convergenceTol`=1e-5.
+* `def train(input: RDD[(Vector, Vector)], topology: Array[Int], maxNumIterations: Int,
+convergenceTol: Double): ArtificialNeuralNetworkModel`: starts training with random initial
+weights. Allows setting a customised `convergenceTol`.
+* `def train(input: RDD[(Vector, Vector)], topology: Array[Int], initialWeights: Vector,
+maxNumIterations: Int, convergenceTol: Double): ArtificialNeuralNetworkModel`: starts training
+with given initial weights. Allows setting a customised `convergenceTol`.
+* `def train(input: RDD[(Vector, Vector)], model: ArtificialNeuralNetworkModel,
+maxNumIterations: Int, convergenceTol: Double): ArtificialNeuralNetworkModel`: resumes training
+given an earlier calculated model. Allows setting a customised `convergenceTol`.
+
+All training functions return the trained ANN using the class `ArtificialNeuralNetworkModel`.
+This class has the following function:
+
+* `predictV(testData: Vector): Vector` calculates the output vector given input vector
+`testData`.
+
+The weights use dby `predictV` come from the model.
 
 ## Training
 
-Science has provided many different strategies to train an ANN. Hence it is important that the optimising functions in MLlib's ANN are interchangeable. The ParallelANN class has a variable 'optimizer', which is currently set to a 'GradientDescent' optimising class. The 'GradientDescent' optimising class implements a stochastic gradient descent method, and is also used for other optimisation technologies in Spark. It is expected that other optimising functions will be defined for Spark, and these can be stored in the 'optimizer' variable.
+We have chosen to implement the ANN with LBFGS as optimiser function. We compared it with
+Statistical Gradient Descent. LBGFS was much faster, but in accordance is also earlier with
+overfitting.
+
+Science has provided many different strategies to train an ANN. Hence it is important that the
+optimising functions in MLlib's ANN are interchangeable. A new optimisation strategy can be
+implemented by creating a new class descending from ArtificialNeuralNetwork, and replacing the
+optimiser, updater and possibly gradient as required.
 
-# Demo/test
+# Demo and tests
 
-Usage of MLlib's ANN is demonstrated through the 'TestParallelANN' demo program. The program generates three functions:
+Usage of MLlib's ANN is demonstrated through the 'ANNDemo' demo program. The program generates
+three functions:
 
 * f2d: x -> y
 * f3d: (x,y) -> z
 * f4d: t -> (x,y,z)
 
-When the program is given the Java argument 'graph', it will show a graphical representation of the target function and the latest values.
+It will calculate an approximation of the target function, and show a graphical representation
+of the training set and the results after applying the testing set.
+
+In addition, there are the following tests:
+
+* "ANN learns XOR function": tests that the ANN can properly approximate an XOR function.
+* "Gradient of ANN": tests that the output of the ANN gradient is roughly equal to an
+approximated gradient.
 
 # Conclusion
 
-The 'ParallelANN' class implements a Artificial Neural Network (ANN), using the stochastic gradient descent method. It takes as input an RDD of input/output values of type 'Vector', and returns an object of type 'ParallelANNModel' containing the parameters of the trained ANN. The 'ParallelANNModel' object can also be used to calculate results after training.
+The 'AritificalNeuralNetwork' class implements a Artificial Neural Network (ANN), using the
+LBFGS algorithm. It takes as input an RDD of input/output values of type 'Vector', and returns
+an object of type 'ArtificialNeuralNetworkModel' containing the parameters of the trained ANN.
+The 'ArtificialNeuralNetworkModel' object can also be used to calculate results after training.
 
-The training of an ANN can be interrupted and later continued, allowing intermediate inspection of the results.
+The training of an ANN can be interrupted and later continued, allowing intermediate inspection
+of the results.
 
-A demo program for ANN is provided.
+A demo program and tests for ANN are provided.

From 4b83de4ca52be4810b254b6dbd031be33944f408 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Mon, 22 Sep 2014 16:30:08 +0800
Subject: [PATCH 112/143] Update mllib-ann.md

Fixed sigmoid
---
 docs/mllib-ann.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/mllib-ann.md b/docs/mllib-ann.md
index 4a1acd5f380da..ac42446276255 100644
--- a/docs/mllib-ann.md
+++ b/docs/mllib-ann.md
@@ -78,7 +78,7 @@ The value of node N_{j,l} is calculated  as follows:
 
 Where g is the sigmod function
 
-`$g(t) = \frac{e^{\beta t} }{1+e^{\beta t}}$`
+`$g(t) = \frac{1}{1+e^{-t}}$`
 
 # LBFGS
 

From 78283279e7419c3c52cf3289de94f8915de51812 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Tue, 23 Sep 2014 10:20:54 +0800
Subject: [PATCH 113/143] Update ArtificialNeuralNetwork.scala

Fixed erroneously reverting ANNHelper trait
---
 .../mllib/ann/ArtificialNeuralNetwork.scala   | 168 +++++++-----------
 1 file changed, 64 insertions(+), 104 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index fe200f8c1aae2..d99e78363ae2e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -69,62 +69,17 @@ import org.apache.spark.util.random.XORShiftRandom
  */
 
 class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topology: Array[Int])
-  extends Serializable {
-
-  private val L = topology.length - 1
-
-  private val ofsWeight: Array[Int] = {
-    val tmp = new Array[Int](L + 1)
-    var curPos = 0
-    tmp(0) = 0
-    var l = 1
-    while(l <= L) {
-      tmp(l) = curPos
-      curPos = curPos + (topology(l - 1) + 1) * (topology(l))
-      l += 1
-    }
-    tmp
-  }
-
-  private def g(x: Double) = 1.0 / (1.0 + math.exp(-x))
+  extends Serializable with ANNHelper {
 
   def computeValues(arrData: Array[Double], arrWeights: Array[Double]): Array[Double] = {
-    var arrPrev = new Array[Double](topology(0))
-    var i: Int = 0
-    var j: Int = 0
-    var l: Int = 0
-    i = 0
-    while(i < topology(0)) {
-      arrPrev(i) = arrData(i)
-      i += 1
-    }
-    l = 1
-    while(l <= L) {
-      val arrCur = new Array[Double](topology(l))
-      j = 0
-      while(j < topology(l)) {
-        var cum: Double = 0.0
-        i = 0
-        while( i < topology(l - 1) ) {
-          cum = cum +
-            arrPrev(i) * arrWeights(ofsWeight(l) + (topology(l - 1) + 1) * j + i)
-          i += 1
-        }
-        cum = cum +
-          arrWeights(ofsWeight(l) + (topology(l - 1) + 1) * j + topology(l - 1)) // bias
-        arrCur(j) = g(cum)
-        j += 1
-      }
-      arrPrev = arrCur
-      l += 1
-    }
-    arrPrev
+    val arrNodes = forwardRun(arrData, arrWeights)
+    arrNodes.slice(arrNodes.size - topology(L), arrNodes.size)
   }
 
   /**
    * Predict values for a single data point using the model trained.
    *
-   * @param testData Vector representing a single data point
+   * @param testData array representing a single data point
    * @return Vector prediction from the trained model
    *
    *         Returns the complete vector.
@@ -141,14 +96,14 @@ class ArtificialNeuralNetwork private(
     convergenceTol: Double)
   extends Serializable {
 
-  private var gradient: Gradient = new ANNLeastSquaresGradient(topology)
-  private var updater: Updater = new ANNUpdater()
+  private val gradient = new ANNLeastSquaresGradient(topology)
+  private val updater = new ANNUpdater()
   private var optimizer: Optimizer = new LBFGS(gradient, updater).
-    setConvergenceTol( convergenceTol ).
-    setMaxNumIterations(maxNumIterations)
+    setConvergenceTol(convergenceTol).
+    setNumIterations(maxNumIterations)
 
   private def run(input: RDD[(Vector, Vector)], initialWeights: Vector):
-      ArtificialNeuralNetworkModel = {
+  ArtificialNeuralNetworkModel = {
     val data = input.map(v =>
       (0.0,
         Vectors.fromBreeze(DenseVector.vertcat(
@@ -158,13 +113,10 @@ class ArtificialNeuralNetwork private(
     val weights = optimizer.optimize(data, initialWeights)
     new ArtificialNeuralNetworkModel(weights, topology)
   }
-
 }
 
 object ArtificialNeuralNetwork {
 
-  var optimizer: Optimizer = null;
-
   def train(
       input: RDD[(Vector, Vector)],
       topology: Array[Int],
@@ -211,7 +163,7 @@ object ArtificialNeuralNetwork {
       topology: Array[Int],
       maxNumIterations: Int,
       convergenceTol: Double): ArtificialNeuralNetworkModel = {
-    train(input, topology, randomWeights(topology), maxNumIterations, convergenceTol)
+        train(input, topology, randomWeights(topology), maxNumIterations, convergenceTol)
   }
 
   def randomWeights(topology: Array[Int]): Vector = {
@@ -223,7 +175,7 @@ object ArtificialNeuralNetwork {
     val noWeights = {
       var tmp = 0
       var i = 1
-      while(i < topology.size) {
+      while (i < topology.size) {
         tmp = tmp + topology(i) * (topology(i - 1) + 1)
         i += 1
       }
@@ -231,95 +183,88 @@ object ArtificialNeuralNetwork {
     }
 
     val initialWeightsArr = new Array[Double](noWeights)
-    var pos = 0;
+    var pos = 0
 
     l = 1
-    while( l < topology.length) {
+    while (l < topology.length) {
       i = 0
-      while(i < (topology(l) * (topology(l - 1) + 1))) {
+      while (i < (topology(l) * (topology(l - 1) + 1))) {
         initialWeightsArr(pos) = (rand.nextDouble * 4.8 - 2.4) / (topology(l - 1) + 1)
-        pos += 1;
+        pos += 1
         i += 1
       }
       l += 1
     }
     Vectors.dense(initialWeightsArr)
   }
-
 }
 
-private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
+private[ann] trait ANNHelper {
+  protected val topology: Array[Int]
+  protected def g(x: Double) = 1.0 / (1.0 + math.exp(-x))
+  protected val L = topology.length - 1
 
-  private def g(x: Double) = 1.0 / (1.0 + math.exp(-x))
-
-  private val L = topology.length - 1
-
-  private val noWeights = {
+  protected val noWeights = {
     var tmp = 0
     var l = 1
-    while(l <= L) {
+    while (l <= L) {
       tmp = tmp + topology(l) * (topology(l - 1) + 1)
       l += 1
     }
     tmp
   }
 
-  val ofsWeight: Array[Int] = {
+  protected val ofsWeight: Array[Int] = {
     val tmp = new Array[Int](L + 1)
-    var curPos = 0;
-    tmp(0) = 0;
+    var curPos = 0
+    tmp(0) = 0
     var l = 1
-    while(l <= L) {
+    while (l <= L) {
       tmp(l) = curPos
-      curPos = curPos + (topology(l - 1) + 1) * (topology(l))
+      curPos = curPos + (topology(l - 1) + 1) * topology(l)
       l += 1
     }
     tmp
   }
 
-  val noNodes: Int = {
+  protected val noNodes: Int = {
     var tmp: Integer = 0
     var l = 0
-    while(l < topology.size) {
+    while (l < topology.size) {
       tmp = tmp + topology(l)
       l += 1
     }
     tmp
   }
 
-  val ofsNode: Array[Int] = {
+  protected val ofsNode: Array[Int] = {
     val tmp = new Array[Int](L + 1)
     tmp(0) = 0
     var l = 1
-    while(l <= L) {
+    while (l <= L) {
       tmp(l) = tmp(l - 1) + topology(l - 1)
       l += 1
     }
     tmp
   }
 
-  override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
-    val arrData = data.toArray
-    val arrWeights = weights.toArray
+  protected def forwardRun(arrData: Array[Double], arrWeights: Array[Double]): Array[Double] = {
     val arrNodes = new Array[Double](noNodes)
-
     var i: Int = 0
     var j: Int = 0
     var l: Int = 0
-
-    // forward run
-    i = 0;
-    while(i < topology(0)) {
+    i = 0
+    while (i < topology(0)) {
       arrNodes(i) = arrData(i)
       i += 1
     }
     l = 1
-    while( l <= L ) {
+    while (l <= L) {
       j = 0
-      while(j < topology(l)) {
-        var cum: Double = 0.0;
+      while (j < topology(l)) {
+        var cum: Double = 0.0
         i = 0
-        while(i < topology(l - 1)) {
+        while (i < topology(l - 1)) {
           cum = cum +
             arrWeights(ofsWeight(l) + (topology(l - 1) + 1) * j + i) *
               arrNodes(ofsNode(l - 1) + i)
@@ -331,16 +276,31 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
       }
       l += 1
     }
+    arrNodes
+  }
+}
+
+private class ANNLeastSquaresGradient(val topology: Array[Int]) extends Gradient with ANNHelper {
+
+  override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
+    val arrData = data.toArray
+    val arrWeights = weights.toArray
+
+    var i: Int = 0
+    var j: Int = 0
+    var l: Int = 0
+    // forward run
+    val arrNodes = forwardRun(arrData, arrWeights)
     val arrDiff = new Array[Double](topology(L))
     j = 0
-    while( j < topology(L)) {
-      arrDiff(j) = (arrNodes(ofsNode(L) + j) - arrData(topology(0) + j))
+    while (j < topology(L)) {
+      arrDiff(j) = arrNodes(ofsNode(L) + j) - arrData(topology(0) + j)
       j += 1
     }
 
-    var err: Double = 0;
+    var err: Double = 0
     j = 0
-    while(j < topology(L)) {
+    while (j < topology(L)) {
       err = err + arrDiff(j) * arrDiff(j)
       j += 1
     }
@@ -348,19 +308,19 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
     // back propagation
     val arrDelta = new Array[Double](noNodes)
     j = 0
-    while(j < topology(L)) {
+    while (j < topology(L)) {
       arrDelta(ofsNode(L) + j) =
         arrDiff(j) *
           arrNodes(ofsNode(L) + j) * (1 - arrNodes(ofsNode(L) + j))
       j += 1
     }
     l = L - 1
-    while(l > 0) {
+    while (l > 0) {
       j = 0
-      while(j < topology(l)) {
+      while (j < topology(l)) {
         var cum: Double = 0.0
         i = 0
-        while( i < topology(l + 1)) {
+        while (i < topology(l + 1)) {
           cum = cum +
             arrWeights(ofsWeight(l + 1) + (topology(l) + 1) * i + j) *
               arrDelta(ofsNode(l + 1) + i) *
@@ -375,11 +335,11 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
     // gradient
     val arrGrad = new Array[Double](noWeights)
     l = 1
-    while(l <= L) {
+    while (l <= L) {
       j = 0
-      while(j < topology(l)) {
+      while (j < topology(l)) {
         i = 0
-        while(i < topology(l - 1)) {
+        while (i < topology(l - 1)) {
           arrGrad(ofsWeight(l) + (topology(l - 1) + 1) * j + i) =
             arrNodes(ofsNode(l - 1) + i) *
               arrDelta(ofsNode(l) + j)
@@ -401,7 +361,7 @@ private class ANNLeastSquaresGradient(topology: Array[Int]) extends Gradient {
       cumGradient: Vector): Double = {
     val (grad, err) = compute(data, label, weights)
     cumGradient.toBreeze += grad.toBreeze
-    return err
+    err
   }
 }
 

From 84ac2e812fb85994559cc69cfcb5c4aacb4b1a78 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Tue, 23 Sep 2014 10:22:58 +0800
Subject: [PATCH 114/143] Update ArtificialNeuralNetwork.scala

Cosmetic white space change
---
 .../org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index d99e78363ae2e..151526de60c4d 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -103,7 +103,7 @@ class ArtificialNeuralNetwork private(
     setNumIterations(maxNumIterations)
 
   private def run(input: RDD[(Vector, Vector)], initialWeights: Vector):
-  ArtificialNeuralNetworkModel = {
+      ArtificialNeuralNetworkModel = {
     val data = input.map(v =>
       (0.0,
         Vectors.fromBreeze(DenseVector.vertcat(

From e2e94b2b7e732fd3c66272a92a0169fb5e5e6eab Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Tue, 23 Sep 2014 10:26:36 +0800
Subject: [PATCH 115/143] Update ArtificialNeuralNetwork.scala

Another cosmetic white space change
---
 .../org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index 151526de60c4d..8531851620a61 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -163,7 +163,7 @@ object ArtificialNeuralNetwork {
       topology: Array[Int],
       maxNumIterations: Int,
       convergenceTol: Double): ArtificialNeuralNetworkModel = {
-        train(input, topology, randomWeights(topology), maxNumIterations, convergenceTol)
+    train(input, topology, randomWeights(topology), maxNumIterations, convergenceTol)
   }
 
   def randomWeights(topology: Array[Int]): Vector = {

From a7fb749031a2a7e40db5c43e127a5c11456a742b Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Tue, 23 Sep 2014 10:37:27 +0800
Subject: [PATCH 116/143] Update ArtificialNeuralNetwork.scala

Update comment
---
 .../org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index 8531851620a61..fe4bc40f26008 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -79,7 +79,7 @@ class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topol
   /**
    * Predict values for a single data point using the model trained.
    *
-   * @param testData array representing a single data point
+   * @param testData Vector representing a single data point
    * @return Vector prediction from the trained model
    *
    *         Returns the complete vector.

From 3995be80c5aac06f2e02f5b8e749d5bd42a0fd36 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Thu, 25 Sep 2014 16:29:40 +0800
Subject: [PATCH 117/143] Update mllib-ann.md

---
 docs/mllib-ann.md | 126 +++++++++++++++++++++++++++++++---------------
 1 file changed, 85 insertions(+), 41 deletions(-)

diff --git a/docs/mllib-ann.md b/docs/mllib-ann.md
index ac42446276255..bd91b1439da09 100644
--- a/docs/mllib-ann.md
+++ b/docs/mllib-ann.md
@@ -15,6 +15,35 @@ The implementation currently consist of the following files:
 * 'ANNDemo': a demo that approximates three functions and shows a graphical representation of
 the result
 
+# Summary of usage
+
+The "ArtificialNeuralNetwork" object is used as an interface to the neural network. It is
+called as follows:
+
+```
+val annModel = ArtificialNeuralNetwork.train(rdd, hiddenLayersTopology, maxNumIterations)
+```
+
+where
+
+* `rdd` is an RDD of type (Vector,Vector), the first element containing the input vector and
+the second the associated output vector.
+* `hiddenLayersTopology` is an array of integers (Array[Int]), which contains the number of
+nodes per hidden layer, starting with the layer that takes inputs from the input layer, and
+finishing with the layer that outputs to the output layer. The bias nodes are not counted.
+* `maxNumIterations` is an upper bound to the number of iterations to be performed.
+* `ANNmodel` contains the trained ANN parameters, and can be used to calculated the ANNs
+approximation to arbitrary input values.
+
+The approximations can be calculated as follows:
+
+val v_out = annModel.predict(v_in)
+
+where v_in is either a Vector or an RDD of Vectors, and v_out respectively a Vector or RDD of
+(Vector,Vector) pairs, corresponding to input and output values.
+
+Further details and other calling options will be elaborated upon below.
+
 # Architecture and Notation
 
 The file ArtificialNeuralNetwork.scala implements the ANN. The following picture shows the
@@ -72,70 +101,84 @@ The ANN also implements bias units. These are nodes that always output the value
 units are in all layers except the output layer. They act similar to other nodes, but do not
 have input.
 
+The "hiddenLayersTopology" array is converted into the "topology" array by adding the number of
+input nodes in front, and the number of output nodes at the end.
+
 The value of node N_{j,l} is calculated  as follows:
 
 `$N_{j,l} = g( \sum_{i=0}^{topology_l} W_{i,j,l)*N_{i,l-1} )$`
 
-Where g is the sigmod function
+Where g is the sigmoid function
 
-`$g(t) = \frac{1}{1+e^{-t}}$`
+`$g(t) = \frac{e^{\beta t} }{1+e^{\beta t}}$`
 
 # LBFGS
 
-MLlib uses the LBFGS algorithm for training. It minimises the following error function:
+MLlib's ANN implementation uses the LBFGS optimisation algorithm for training. It minimises the
+following error function:
 
-`$E = \sum_{k=0}^{K-1} (N_{k,L} - Y_k )^2$`
+`$E = \sum_{k=0}^{K-1} (N_{k,L} - Y_k)^2$`
 
 where Y_k is the target output given inputs N_{0,0} ... N_{I-1,0}.
 
 # Implementation Details
 
-## The `ArtificialNeuralNetwork` class
+## The "ArtificialNeuralNetwork" class
 
-The `ArtificialNeuralNetwork` class has the following constructor:
+The "ArtificialNeuralNetwork" class has the following constructor:
 
-`class ArtificialNeuralNetwork private(topology: Array[Int], maxNumIterations: Int,
-convergenceTol: Double)`
+```
+class ArtificialNeuralNetwork private(topology: Array[Int], maxNumIterations: Int,
+convergenceTol: Double)
+```
 
 * `topology` is an array of integers indicating then number of nodes per layer. For example, if
-`topology` holds `(3, 5, 1)`, it means that there are three input nodes, five nodes in a single
+"topology" holds (3, 5, 1), it means that there are three input nodes, five nodes in a single
 hidden layer and 1 output node.
 * `maxNumIterations` indicates the number of iterations after which the LBFGS algorithm must
 have stopped.
 * `convergenceTol` indicates the acceptable error, and if reached the LBFGS algorithm will
-stop. A lower number of `convergenceTol` will give a higher precision.
-
-There is also an object `ArtificialNeuralNetwork`. This object contains the training function.
-There are six different instances of the training function, each for use with different
-parameters. All take as the first parameter the RDD `input`, which contains pairs of input and
-output vectors.
-
-* `def train(input: RDD[(Vector, Vector)], topology: Array[Int], maxNumIterations: Int):
-ArtificialNeuralNetworkModel`: starts training with random initial weights, and a default
-`convergenceTol`=1e-5.
-* `def train(input: RDD[(Vector, Vector)], topology: Array[Int], initialWeights: Vector,
-maxNumIterations: Int): ArtificialNeuralNetworkModel`: starts training with given initial
-weights, and a default `convergenceTol`=1e-5.
+stop. A lower value of "convergenceTol" will give a higher precision.
+
+## The "ArtificialNeuralNetwork" object
+
+The object "ArtificialNeuralNetwork" is the interface to the "ArtificialNeuralNetwork" class.
+The object contains the training function. There are four different instances of the training
+function, each for use with different parameters. All take as the first parameter the RDD
+"input", which contains pairs of input and output vectors.
+
+* `def train(input: RDD[(Vector, Vector)], hiddenLayersTopology: Array[Int], maxNumIterations:
+Int): ArtificialNeuralNetworkModel`: starts training with random initial weights, and a default
+convergenceTol=1e-4.
 * `def train(input: RDD[(Vector, Vector)], model: ArtificialNeuralNetworkModel,
 maxNumIterations: Int): ArtificialNeuralNetworkModel`: resumes training given an earlier
-calculated model, and a default `convergenceTol`=1e-5.
-* `def train(input: RDD[(Vector, Vector)], topology: Array[Int], maxNumIterations: Int,
-convergenceTol: Double): ArtificialNeuralNetworkModel`: starts training with random initial
-weights. Allows setting a customised `convergenceTol`.
-* `def train(input: RDD[(Vector, Vector)], topology: Array[Int], initialWeights: Vector,
-maxNumIterations: Int, convergenceTol: Double): ArtificialNeuralNetworkModel`: starts training
-with given initial weights. Allows setting a customised `convergenceTol`.
+calculated model, and a default convergenceTol=1e-4.
+* `def train(input: RDD[(Vector, Vector)], hiddenLayersTopology: Array[Int], maxNumIterations:
+Int, convergenceTol: Double): ArtificialNeuralNetworkModel`: starts training with random
+initial weights. Allows setting a customised "convergenceTol".
 * `def train(input: RDD[(Vector, Vector)], model: ArtificialNeuralNetworkModel,
 maxNumIterations: Int, convergenceTol: Double): ArtificialNeuralNetworkModel`: resumes training
-given an earlier calculated model. Allows setting a customised `convergenceTol`.
+given an earlier calculated model. Allows setting a customised "convergenceTol".
+
+Notice that the "hiddenLayersTopology" differs from the "topology" array. The
+"hiddenLayersTopology" does not include the number of nodes in the input and output layers. The
+number of nodes in input and output layers is calculated from the first element of the training
+RDD. For example, the "topology" array (3, 5, 7, 1) would have a "hiddenLayersTopology" (5, 7),
+the values 3 and 1 are deduced from the training data. The rationale for having these different
+arrays is that future methods may have a different mapping between input values and input nodes
+or output values and output nodes.
+
+## The "ArtificialNeuralNetworkModel" class
 
-All training functions return the trained ANN using the class `ArtificialNeuralNetworkModel`.
+All training functions return the trained ANN using the class "ArtificialNeuralNetworkModel".
 This class has the following function:
 
-* `predictV(testData: Vector): Vector` calculates the output vector given input vector
-`testData`.
+* `predict(testData: Vector): Vector` calculates the output vector given input vector
+"testData".
+* `predict(testData: RDD[Vector]): RDD[(Vector,Vector)]` returns (input, output) vector pairs,
+using input vector pairs in "testData".
 
-The weights use dby `predictV` come from the model.
+The weights used by "predict" come from the model.
 
 ## Training
 
@@ -150,17 +193,17 @@ optimiser, updater and possibly gradient as required.
 
 # Demo and tests
 
-Usage of MLlib's ANN is demonstrated through the 'ANNDemo' demo program. The program generates
+Usage of MLlib's ANN is demonstrated through the "ANNDemo" demo program. The program generates
 three functions:
 
 * f2d: x -> y
 * f3d: (x,y) -> z
 * f4d: t -> (x,y,z)
 
-It will calculate an approximation of the target function, and show a graphical representation
+It will calculate approximations of the target functions, and show a graphical representation
 of the training set and the results after applying the testing set.
 
-In addition, there are the following tests:
+In addition, there are the following automated tests:
 
 * "ANN learns XOR function": tests that the ANN can properly approximate an XOR function.
 * "Gradient of ANN": tests that the output of the ANN gradient is roughly equal to an
@@ -168,10 +211,11 @@ approximated gradient.
 
 # Conclusion
 
-The 'AritificalNeuralNetwork' class implements a Artificial Neural Network (ANN), using the
-LBFGS algorithm. It takes as input an RDD of input/output values of type 'Vector', and returns
-an object of type 'ArtificialNeuralNetworkModel' containing the parameters of the trained ANN.
-The 'ArtificialNeuralNetworkModel' object can also be used to calculate results after training.
+The "ArtificialNeuralNetwork" class implements a Artificial Neural Network (ANN), using the
+LBFGS algorithm. It takes as input an RDD of input/output values of type "(Vector,Vector)", and
+returns an object of type "ArtificialNeuralNetworkModel" containing the parameters of the
+trained ANN. The "ArtificialNeuralNetworkModel" object can also be used to calculate results
+after training.
 
 The training of an ANN can be interrupted and later continued, allowing intermediate inspection
 of the results.

From b44aec3ddcf409331f7dccb8b9cf622618406cfc Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Thu, 25 Sep 2014 16:31:17 +0800
Subject: [PATCH 118/143] Update ANNDemo.scala

Updated to fit "hiddenLayersTopology"
---
 .../org/apache/spark/examples/ANNDemo.scala   | 20 ++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/examples/src/main/scala/org/apache/spark/examples/ANNDemo.scala b/examples/src/main/scala/org/apache/spark/examples/ANNDemo.scala
index 14e9a2d61cb10..dd981f90e9cff 100644
--- a/examples/src/main/scala/org/apache/spark/examples/ANNDemo.scala
+++ b/examples/src/main/scala/org/apache/spark/examples/ANNDemo.scala
@@ -21,11 +21,13 @@ import java.awt._
 import java.awt.event._
 import java.text.SimpleDateFormat
 import java.util.Calendar
+
 import org.apache.spark._
 import org.apache.spark.mllib.ann._
 import org.apache.spark.mllib.linalg._
 import org.apache.spark.mllib.regression._
 import org.apache.spark.rdd.RDD
+
 import scala.Array.canBuildFrom
 import scala.util.Random
 
@@ -367,7 +369,7 @@ object ANNDemo {
   }
 
 
-  def generateInput3D(f: (Double,Double) => Double, xmin: Double, xmax: Double, 
+  def generateInput3D(f: (Double,Double) => Double, xmin: Double, xmax: Double,
       ymin: Double, ymax: Double, noPoints: Int): Array[(Vector,Vector)] = {
 
     var out = new Array[(Vector,Vector)](noPoints)
@@ -484,7 +486,7 @@ object ANNDemo {
 
     val validationRDD2D =
       sc.parallelize(generateInput2D( T => f(T), -10, 10, 100 ), 2).cache
-    val validationRDD3D = 
+    val validationRDD3D =
       sc.parallelize(generateInput3D( (x,y) => f3D(x,y), -10, 10, -10, 10, 100 ), 2).cache
     val validationRDD4D =
       sc.parallelize( generateInput4D( t => f4D(t), -10, 10, 100 ), 2 ).cache
@@ -495,25 +497,25 @@ object ANNDemo {
 
     var starttime = Calendar.getInstance().getTime()
     println("Training 2D")
-    var model2D = ArtificialNeuralNetwork.train(testRDD2D, Array[Int](1, 5, 3, 1), 1000, 1e-8)
+    var model2D = ArtificialNeuralNetwork.train(testRDD2D, Array[Int](5, 3), 1000, 1e-8)
     var stoptime = Calendar.getInstance().getTime()
     println(((stoptime.getTime-starttime.getTime + 500) / 1000) + "s")
 
     starttime = stoptime
     println("Training 3D")
-    var model3D = ArtificialNeuralNetwork.train(testRDD3D, Array[Int](2, 20, 1), 1000, 1e-8)
+    var model3D = ArtificialNeuralNetwork.train(testRDD3D, Array[Int](20), 1000, 1e-8)
     stoptime = Calendar.getInstance().getTime()
     println(((stoptime.getTime-starttime.getTime + 500) / 1000) + "s")
 
     starttime = stoptime
     println("Training 4D")
-    var model4D = ArtificialNeuralNetwork.train(testRDD4D, Array[Int](1, 20, 3), 1000, 1e-8)
+    var model4D = ArtificialNeuralNetwork.train(testRDD4D, Array[Int](20), 1000, 1e-8)
     stoptime = Calendar.getInstance().getTime()
     println(((stoptime.getTime-starttime.getTime + 500) / 1000) + "s")
 
-    val predictedAndTarget2D = validationRDD2D.map(T => (T._1, T._2, model2D.predictV(T._1)))
-    val predictedAndTarget3D = validationRDD3D.map(T => (T._1, T._2, model3D.predictV(T._1)))
-    val predictedAndTarget4D = validationRDD4D.map(T => (T._1, T._2, model4D.predictV(T._1)))
+    val predictedAndTarget2D = validationRDD2D.map(T => (T._1, T._2, model2D.predict(T._1)))
+    val predictedAndTarget3D = validationRDD3D.map(T => (T._1, T._2, model3D.predict(T._1)))
+    val predictedAndTarget4D = validationRDD4D.map(T => (T._1, T._2, model4D.predict(T._1)))
 
     var err2D = predictedAndTarget2D.map( T =>
       (T._3.toArray(0) - T._2.toArray(0))*(T._3.toArray(0) - T._2.toArray(0))
@@ -554,7 +556,7 @@ object ANNDemo {
 
     while(true) { // stops when closing the window
 
-      curAngle = curAngle + math.Pi/4
+      curAngle = curAngle + math.Pi/8
       if(curAngle >= 2*math.Pi) {
         curAngle = curAngle - 2*math.Pi
       }

From 6265bd6dcc58dd75e78a018d5c6741f0755fe4c9 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Thu, 25 Sep 2014 16:32:33 +0800
Subject: [PATCH 119/143] Update ArtificialNeuralNetwork.scala

Updated with hiddenLayersTopology and added scaladoc API info.
---
 .../mllib/ann/ArtificialNeuralNetwork.scala   | 178 +++++++++++++-----
 1 file changed, 135 insertions(+), 43 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index fe4bc40f26008..95e7e87e984d3 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.mllib.ann
 
 import breeze.linalg.{DenseVector, Vector => BV, axpy => brzAxpy}
+
 import org.apache.spark.mllib.linalg.{Vector, Vectors}
 import org.apache.spark.mllib.optimization._
 import org.apache.spark.rdd.RDD
@@ -33,15 +34,20 @@ import org.apache.spark.util.random.XORShiftRandom
  *
  * NOTE: output values should be in the range [0,1]
  *
- * For a network of L layers:
+ * For a network of H hidden layers:
+ *
+ * hiddenLayersTopology(h) indicates the number of nodes in hidden layer h, excluding the bias 
+ * node. h counts from 0 (first hidden layer, taking inputs from input layer) to H - 1 (last
+ * hidden layer, sending outputs to the output layer).
  *
- * topology( l ) indicates the number of nodes in layer l, excluding the bias node.
+ * hiddenLayersTopology is converted internally to topology, which adds the number of nodes
+ * in the input and output layers.
  *
  * noInput = topology(0), the number of input nodes
  * noOutput = topology(L-1), the number of output nodes
  *
  * input = data( 0 to noInput-1 )
- * output = data( noInput to noInput+noOutput-1 )
+ * output = data( noInput to noInput + noOutput - 1 )
  *
  * W_ijl is the weight from node i in layer l-1 to node j in layer l
  * W_ijl goes to position ofsWeight(l) + j*(topology(l-1)+1) + i in the weights vector
@@ -68,29 +74,57 @@ import org.apache.spark.util.random.XORShiftRandom
  *
  */
 
+/**
+ * Contains the parameters of an Artificial Neural Network (ANN)
+ * 
+ * @param weights The weights between the neurons in the ANN.
+ * 
+ * @param topology Array containing the number of nodes per layer in the network, including
+ * the nodes in the input and output layer, but excluding the bias nodes.
+ */
 class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topology: Array[Int])
   extends Serializable with ANNHelper {
 
-  def computeValues(arrData: Array[Double], arrWeights: Array[Double]): Array[Double] = {
-    val arrNodes = forwardRun(arrData, arrWeights)
-    arrNodes.slice(arrNodes.size - topology(L), arrNodes.size)
+  /**
+   * Predicts values for a single data point using the trained model.
+   *
+   * @param testData Represents a single data point.
+   *
+   * @return Prediction using the trained model.
+   */
+  def predict(testData: Vector): Vector = {
+    Vectors.dense(computeValues(testData.toArray, weights.toArray))
   }
 
   /**
-   * Predict values for a single data point using the model trained.
+   * Predict values for an RDD of data points using the trained model.
    *
-   * @param testData Vector representing a single data point
-   * @return Vector prediction from the trained model
+   * @param testDataRDD RDD representing the input vectors.
    *
-   *         Returns the complete vector.
+   * @return RDD with predictions using the trained model as (input, output) pairs.
    */
-  def predictV(testData: Vector): Vector = {
-    Vectors.dense(computeValues(testData.toArray, weights.toArray))
+  def predict(testDataRDD: RDD[Vector]): RDD[(Vector,Vector)] = {
+    testDataRDD.map(T => (T, predict(T)) )
+  }
+
+  private def computeValues(arrData: Array[Double], arrWeights: Array[Double]): Array[Double] = {
+    val arrNodes = forwardRun(arrData, arrWeights)
+    arrNodes.slice(arrNodes.size - topology(L), arrNodes.size)
   }
 
 }
 
-class ArtificialNeuralNetwork private(
+/**
+ * Performs the training of an Artificial Neural Network (ANN)
+ *
+ * @param topology A vector containing the number of nodes per layer in the network, including
+ * the nodes in the input and output layer, but excluding the bias nodes.
+ * 
+ * @param maxNumItereations The maximum number of iterations for the training phase.
+ * 
+ * @param convergenceTol Convergence tolerance for LBFGS. Smaller value for closer convergence.
+ */
+class ArtificialNeuralNetwork private[mllib](
     topology: Array[Int],
     maxNumIterations: Int,
     convergenceTol: Double)
@@ -98,13 +132,24 @@ class ArtificialNeuralNetwork private(
 
   private val gradient = new ANNLeastSquaresGradient(topology)
   private val updater = new ANNUpdater()
-  private var optimizer: Optimizer = new LBFGS(gradient, updater).
+  private val optimizer = new LBFGS(gradient, updater).
     setConvergenceTol(convergenceTol).
     setNumIterations(maxNumIterations)
 
-  private def run(input: RDD[(Vector, Vector)], initialWeights: Vector):
+ /**
+   * Trains the ANN.
+   *
+   * @param trainingRDD RDD containing (input, output) pairs for training.
+   *
+   * @param initialWeights: the initial weights of the ANN   
+   *
+   * @return Trained ANN as ArtificialNeuralNetworkModel.
+   *
+   * Uses default convergence tolerance 1e-4 for LBFGS.
+   */
+  private def run(trainingRDD: RDD[(Vector, Vector)], initialWeights: Vector):
       ArtificialNeuralNetworkModel = {
-    val data = input.map(v =>
+    val data = trainingRDD.map(v =>
       (0.0,
         Vectors.fromBreeze(DenseVector.vertcat(
           v._1.toBreeze.toDenseVector,
@@ -115,58 +160,105 @@ class ArtificialNeuralNetwork private(
   }
 }
 
+/**
+ * Interface to the Artificial Neural Network (ANN)
+ */
 object ArtificialNeuralNetwork {
 
+  private val defaultTolerance: Double = 1e-4
+
+  /**
+   * Trains an ANN.
+   *
+   * @param trainingRDD RDD containing (input, output) pairs for training.
+   *
+   * @param hiddenLayersTopology Number of nodes per hidden layer, excluding the bias nodes.
+   *
+   * @param maxNumIterations Specifies maximum number of training iterations.
+   *
+   * @return Trained ANN as ArtificialNeuralNetworkModel.
+   *
+   * Uses default convergence tolerance 1e-4 for LBFGS.
+   */
   def train(
-      input: RDD[(Vector, Vector)],
-      topology: Array[Int],
-      initialWeights: Vector,
+      trainingRDD: RDD[(Vector, Vector)],
+      hiddenLayersTopology: Array[Int],
       maxNumIterations: Int): ArtificialNeuralNetworkModel = {
-    new ArtificialNeuralNetwork(topology, maxNumIterations, 1e-4)
-      .run(input, initialWeights)
+    train( trainingRDD, hiddenLayersTopology, maxNumIterations, defaultTolerance)
   }
 
+  /**
+   * Continues training of an ANN.
+   *
+   * @param trainingRDD RDD containing (input, output) pairs for training.
+   *
+   * @param model Model of an already partly trained ANN.
+   *
+   * @param maxNumIterations Int Maximum number of training iterations.
+   *
+   * @return Trained ANN as ArtificialNeuralNetworkModel.
+   *
+   * Uses default convergence tolerance 1e-4 for LBFGS.
+   */
   def train(
       input: RDD[(Vector,Vector)],
       model: ArtificialNeuralNetworkModel,
       maxNumIterations: Int): ArtificialNeuralNetworkModel = {
-    train(input, model.topology, model.weights, maxNumIterations)
-  }
-
-  def train(
-      input: RDD[(Vector, Vector)],
-      topology: Array[Int],
-      maxNumIterations: Int): ArtificialNeuralNetworkModel = {
-    train(input, topology, randomWeights(topology), maxNumIterations)
-  }
-
-  def train(
-      input: RDD[(Vector, Vector)],
-      topology: Array[Int],
-      initialWeights: Vector,
-      maxNumIterations: Int,
-      convergenceTol: Double): ArtificialNeuralNetworkModel = {
-    new ArtificialNeuralNetwork(topology, maxNumIterations, convergenceTol)
-      .run(input, initialWeights)
+    train(input, model, maxNumIterations, defaultTolerance)
   }
 
+  /**
+   * Trains an ANN using customized convergence tolerance.
+   *
+   * @param trainingRDD RDD containing (input, output) pairs for training.
+   *
+   * @param hiddenLayersTopology Number of nodes per hidden layer, excluding the bias nodes.
+   *
+   * @param maxNumIterations Maximum number of training iterations.
+   *
+   * @param convergenceTol Convergence tolerance for LBFGS. Smaller value for closer convergence.
+   *
+   * @return Trained ANN as ArtificialNeuralNetworkModel.
+   */
   def train(
       input: RDD[(Vector,Vector)],
       model: ArtificialNeuralNetworkModel,
       maxNumIterations: Int,
       convergenceTol: Double): ArtificialNeuralNetworkModel = {
-    train(input, model.topology, model.weights, maxNumIterations, convergenceTol)
+    new ArtificialNeuralNetwork( model.topology, maxNumIterations, convergenceTol ).
+      run(input, model.weights)
   }
 
+  /**
+   * Continues training of an ANN using customized convergence tolerance.
+   *
+   * @param trainingRDD RDD containing (input, output) pairs for training.
+   *
+   * @param model Model of an already partly trained ANN.
+   *
+   * @param maxNumIterations Maximum number of training iterations.
+   *
+   * @param convergenceTol Convergence tolerance for LBFGS. Smaller value for closer convergence.
+   *
+   * @return Trained ANN as ArtificialNeuralNetworkModel.
+   */
   def train(
       input: RDD[(Vector, Vector)],
-      topology: Array[Int],
+      hiddenLayersTopology: Array[Int],
       maxNumIterations: Int,
       convergenceTol: Double): ArtificialNeuralNetworkModel = {
-    train(input, topology, randomWeights(topology), maxNumIterations, convergenceTol)
+    val topology = convertTopology(input, hiddenLayersTopology)
+    new ArtificialNeuralNetwork(topology, maxNumIterations, convergenceTol).
+      run(input, randomWeights(topology))
+  }
+
+  private def convertTopology( input: RDD[(Vector,Vector)],
+      hiddenLayersTopology: Array[Int] ): Array[Int] = {
+    val firstElt = input.first
+    firstElt._1.size +: hiddenLayersTopology :+ firstElt._2.size
   }
 
-  def randomWeights(topology: Array[Int]): Vector = {
+  private def randomWeights(topology: Array[Int]): Vector = {
     val rand = new XORShiftRandom()
 
     var i: Int = 0

From 325ffabb429a09a502c837bfadf29d53fd18f1c8 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Thu, 25 Sep 2014 16:34:10 +0800
Subject: [PATCH 120/143] Update ANNSuite.scala

Updated to fit "hiddenLayersTopology"
---
 .../org/apache/spark/mllib/ann/ANNSuite.scala | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
index 76d0e3e9da7be..86894b5df4e6b 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
@@ -20,6 +20,7 @@ package org.apache.spark.mllib.ann
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.util.LocalSparkContext
 import org.apache.spark.util.random.XORShiftRandom
+
 import org.scalatest.FunSuite
 
 class ANNSuite extends FunSuite with LocalSparkContext {
@@ -32,16 +33,14 @@ class ANNSuite extends FunSuite with LocalSparkContext {
       Array[Double](1,1)
     )
     val outputs = Array[Double](0, 1, 1, 0)
-    val inputSize = 2
     val hiddenSize = 5
-    val outputSize = 1
     val data = inputs.zip(outputs).map { case(features, label) =>
       (Vectors.dense(features), Vectors.dense(Array(label)))}
     val rddData = sc.parallelize(data, 2)
-    val topology = Array[Int](inputSize, hiddenSize, outputSize)
-    val model = ArtificialNeuralNetwork.train(rddData, topology, 100, 1e-5)
+    val hiddenLayersTopology = Array[Int](hiddenSize)
+    val model = ArtificialNeuralNetwork.train(rddData, hiddenLayersTopology, 100, 1e-5)
     val predictionAndLabels = rddData.map { case(input, label) =>
-      (model.predictV(input)(0), label(0)) }.collect()
+      (model.predict(input)(0), label(0)) }.collect()
     assert(predictionAndLabels.forall { case(p, l) => (math.round(p) - l) == 0 })
   }
 
@@ -115,18 +114,20 @@ class ANNSuite extends FunSuite with LocalSparkContext {
         arrTmpWeights(w) = arrTmpWeights(w) + eps
 
         val annModel1 = new ArtificialNeuralNetworkModel(weights, topology)
-        val brzO1 = annModel1.predictV(data).toBreeze
+        val brzO1 = annModel1.predict(data).toBreeze
 
         val annModel2 = new ArtificialNeuralNetworkModel(tmpWeights, topology)
-        val brzO2 = annModel2.predictV(data).toBreeze
+        val brzO2 = annModel2.predict(data).toBreeze
 
         val E1 = .5* (brzO1 - brzOut).dot(brzO1 - brzOut)
         val E2 = .5* (brzO2 - brzOut).dot(brzO2 - brzOut)
         val dEdW = ( E2 - E1 ) / eps
 
         val gradw = gradient(w)
-        val err = math.abs(dEdW - gradw)
-        assert(err < accept, s"Difference between calculated and approximated gradient too large (approximated $dEdW, calculated $gradw, difference $err)" )
+        val err = dEdW - gradw
+        assert(math.abs(err) < accept, 
+      s"Difference between calculated and approximated gradient too large ($dEdW - $gradw = $err)"
+        )
 
         arrTmpWeights(w) = arrTmpWeights(w) - eps
 

From 099ff8550c6fff3dba64c8f53a3f7ea62afca55b Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Thu, 25 Sep 2014 16:37:23 +0800
Subject: [PATCH 121/143] Update ArtificialNeuralNetwork.scala

---
 .../org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index 95e7e87e984d3..373e4a3e38ad0 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -141,7 +141,7 @@ class ArtificialNeuralNetwork private[mllib](
    *
    * @param trainingRDD RDD containing (input, output) pairs for training.
    *
-   * @param initialWeights: the initial weights of the ANN   
+   * @param initialWeights The initial weights of the ANN   
    *
    * @return Trained ANN as ArtificialNeuralNetworkModel.
    *

From 1c0aab4fda5e7d328738d65284a23f090885f9e0 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Thu, 25 Sep 2014 16:42:59 +0800
Subject: [PATCH 122/143] Update ArtificialNeuralNetwork.scala

---
 .../spark/mllib/ann/ArtificialNeuralNetwork.scala    | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index 373e4a3e38ad0..77ff9b3bc9b3e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -201,10 +201,10 @@ object ArtificialNeuralNetwork {
    * Uses default convergence tolerance 1e-4 for LBFGS.
    */
   def train(
-      input: RDD[(Vector,Vector)],
+      trainingRDD: RDD[(Vector,Vector)],
       model: ArtificialNeuralNetworkModel,
       maxNumIterations: Int): ArtificialNeuralNetworkModel = {
-    train(input, model, maxNumIterations, defaultTolerance)
+    train(trainingRDD, model, maxNumIterations, defaultTolerance)
   }
 
   /**
@@ -221,12 +221,12 @@ object ArtificialNeuralNetwork {
    * @return Trained ANN as ArtificialNeuralNetworkModel.
    */
   def train(
-      input: RDD[(Vector,Vector)],
+      trainingRDD: RDD[(Vector,Vector)],
       model: ArtificialNeuralNetworkModel,
       maxNumIterations: Int,
       convergenceTol: Double): ArtificialNeuralNetworkModel = {
     new ArtificialNeuralNetwork( model.topology, maxNumIterations, convergenceTol ).
-      run(input, model.weights)
+      run(trainingRDD, model.weights)
   }
 
   /**
@@ -243,13 +243,13 @@ object ArtificialNeuralNetwork {
    * @return Trained ANN as ArtificialNeuralNetworkModel.
    */
   def train(
-      input: RDD[(Vector, Vector)],
+      trainingRDD: RDD[(Vector, Vector)],
       hiddenLayersTopology: Array[Int],
       maxNumIterations: Int,
       convergenceTol: Double): ArtificialNeuralNetworkModel = {
     val topology = convertTopology(input, hiddenLayersTopology)
     new ArtificialNeuralNetwork(topology, maxNumIterations, convergenceTol).
-      run(input, randomWeights(topology))
+      run(trainingRDD, randomWeights(topology))
   }
 
   private def convertTopology( input: RDD[(Vector,Vector)],

From 5db2b60246dad8f75004e3364f4919b5ca3098df Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Thu, 25 Sep 2014 16:50:10 +0800
Subject: [PATCH 123/143] Update ArtificialNeuralNetwork.scala

---
 .../org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index 77ff9b3bc9b3e..ca1df249175fe 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -247,7 +247,7 @@ object ArtificialNeuralNetwork {
       hiddenLayersTopology: Array[Int],
       maxNumIterations: Int,
       convergenceTol: Double): ArtificialNeuralNetworkModel = {
-    val topology = convertTopology(input, hiddenLayersTopology)
+    val topology = convertTopology(trainingRDD, hiddenLayersTopology)
     new ArtificialNeuralNetwork(topology, maxNumIterations, convergenceTol).
       run(trainingRDD, randomWeights(topology))
   }

From b13019a00f1ac38db2630cdfe3016e4ad4549ed0 Mon Sep 17 00:00:00 2001
From: Alexander Ulanov <nashb@yandex.ru>
Date: Fri, 26 Sep 2014 15:55:14 +0400
Subject: [PATCH 124/143] Minor style fixes

---
 .../mllib/ann/ArtificialNeuralNetwork.scala   | 95 ++++++-------------
 .../org/apache/spark/mllib/ann/ANNSuite.scala | 25 +----
 2 files changed, 32 insertions(+), 88 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index ca1df249175fe..2d6110e2b1fc7 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -75,11 +75,10 @@ import org.apache.spark.util.random.XORShiftRandom
  */
 
 /**
- * Contains the parameters of an Artificial Neural Network (ANN)
+ * Artificial neural network (ANN) model
  * 
- * @param weights The weights between the neurons in the ANN.
- * 
- * @param topology Array containing the number of nodes per layer in the network, including
+ * @param weights the weights between the neurons in the ANN.
+ * @param topology array containing the number of nodes per layer in the network, including
  * the nodes in the input and output layer, but excluding the bias nodes.
  */
 class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topology: Array[Int])
@@ -88,9 +87,8 @@ class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topol
   /**
    * Predicts values for a single data point using the trained model.
    *
-   * @param testData Represents a single data point.
-   *
-   * @return Prediction using the trained model.
+   * @param testData represents a single data point.
+   * @return prediction using the trained model.
    */
   def predict(testData: Vector): Vector = {
     Vectors.dense(computeValues(testData.toArray, weights.toArray))
@@ -100,7 +98,6 @@ class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topol
    * Predict values for an RDD of data points using the trained model.
    *
    * @param testDataRDD RDD representing the input vectors.
-   *
    * @return RDD with predictions using the trained model as (input, output) pairs.
    */
   def predict(testDataRDD: RDD[Vector]): RDD[(Vector,Vector)] = {
@@ -111,7 +108,6 @@ class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topol
     val arrNodes = forwardRun(arrData, arrWeights)
     arrNodes.slice(arrNodes.size - topology(L), arrNodes.size)
   }
-
 }
 
 /**
@@ -119,9 +115,7 @@ class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topol
  *
  * @param topology A vector containing the number of nodes per layer in the network, including
  * the nodes in the input and output layer, but excluding the bias nodes.
- * 
- * @param maxNumItereations The maximum number of iterations for the training phase.
- * 
+ * @param maxNumIterations The maximum number of iterations for the training phase.
  * @param convergenceTol Convergence tolerance for LBFGS. Smaller value for closer convergence.
  */
 class ArtificialNeuralNetwork private[mllib](
@@ -134,18 +128,15 @@ class ArtificialNeuralNetwork private[mllib](
   private val updater = new ANNUpdater()
   private val optimizer = new LBFGS(gradient, updater).
     setConvergenceTol(convergenceTol).
-    setNumIterations(maxNumIterations)
+    setMaxNumIterations(maxNumIterations)
 
  /**
-   * Trains the ANN.
+   * Trains the ANN model.
+   * Uses default convergence tolerance 1e-4 for LBFGS.
    *
    * @param trainingRDD RDD containing (input, output) pairs for training.
-   *
-   * @param initialWeights The initial weights of the ANN   
-   *
-   * @return Trained ANN as ArtificialNeuralNetworkModel.
-   *
-   * Uses default convergence tolerance 1e-4 for LBFGS.
+   * @param initialWeights the initial weights of the ANN
+   * @return ANN model.
    */
   private def run(trainingRDD: RDD[(Vector, Vector)], initialWeights: Vector):
       ArtificialNeuralNetworkModel = {
@@ -161,7 +152,7 @@ class ArtificialNeuralNetwork private[mllib](
 }
 
 /**
- * Interface to the Artificial Neural Network (ANN)
+ * Top level methods for training the artificial neural network (ANN)
  */
 object ArtificialNeuralNetwork {
 
@@ -169,16 +160,12 @@ object ArtificialNeuralNetwork {
 
   /**
    * Trains an ANN.
+   * Uses default convergence tolerance 1e-4 for LBFGS.
    *
    * @param trainingRDD RDD containing (input, output) pairs for training.
-   *
-   * @param hiddenLayersTopology Number of nodes per hidden layer, excluding the bias nodes.
-   *
-   * @param maxNumIterations Specifies maximum number of training iterations.
-   *
-   * @return Trained ANN as ArtificialNeuralNetworkModel.
-   *
-   * Uses default convergence tolerance 1e-4 for LBFGS.
+   * @param hiddenLayersTopology number of nodes per hidden layer, excluding the bias nodes.
+   * @param maxNumIterations specifies maximum number of training iterations.
+   * @return ANN model.
    */
   def train(
       trainingRDD: RDD[(Vector, Vector)],
@@ -189,16 +176,12 @@ object ArtificialNeuralNetwork {
 
   /**
    * Continues training of an ANN.
+   * Uses default convergence tolerance 1e-4 for LBFGS.
    *
    * @param trainingRDD RDD containing (input, output) pairs for training.
-   *
-   * @param model Model of an already partly trained ANN.
-   *
-   * @param maxNumIterations Int Maximum number of training iterations.
-   *
-   * @return Trained ANN as ArtificialNeuralNetworkModel.
-   *
-   * Uses default convergence tolerance 1e-4 for LBFGS.
+   * @param model model of an already partly trained ANN.
+   * @param maxNumIterations maximum number of training iterations.
+   * @return ANN model.
    */
   def train(
       trainingRDD: RDD[(Vector,Vector)],
@@ -211,14 +194,10 @@ object ArtificialNeuralNetwork {
    * Trains an ANN using customized convergence tolerance.
    *
    * @param trainingRDD RDD containing (input, output) pairs for training.
-   *
-   * @param hiddenLayersTopology Number of nodes per hidden layer, excluding the bias nodes.
-   *
-   * @param maxNumIterations Maximum number of training iterations.
-   *
-   * @param convergenceTol Convergence tolerance for LBFGS. Smaller value for closer convergence.
-   *
-   * @return Trained ANN as ArtificialNeuralNetworkModel.
+   * @param model model of an already partly trained ANN.
+   * @param maxNumIterations maximum number of training iterations.
+   * @param convergenceTol convergence tolerance for LBFGS. Smaller value for closer convergence.
+   * @return ANN model.
    */
   def train(
       trainingRDD: RDD[(Vector,Vector)],
@@ -233,14 +212,10 @@ object ArtificialNeuralNetwork {
    * Continues training of an ANN using customized convergence tolerance.
    *
    * @param trainingRDD RDD containing (input, output) pairs for training.
-   *
-   * @param model Model of an already partly trained ANN.
-   *
-   * @param maxNumIterations Maximum number of training iterations.
-   *
-   * @param convergenceTol Convergence tolerance for LBFGS. Smaller value for closer convergence.
-   *
-   * @return Trained ANN as ArtificialNeuralNetworkModel.
+   * @param hiddenLayersTopology number of nodes per hidden layer, excluding the bias nodes.
+   * @param maxNumIterations maximum number of training iterations.
+   * @param convergenceTol convergence tolerance for LBFGS. Smaller value for closer convergence.
+   * @return ANN model.
    */
   def train(
       trainingRDD: RDD[(Vector, Vector)],
@@ -260,10 +235,8 @@ object ArtificialNeuralNetwork {
 
   private def randomWeights(topology: Array[Int]): Vector = {
     val rand = new XORShiftRandom()
-
     var i: Int = 0
     var l: Int = 0
-
     val noWeights = {
       var tmp = 0
       var i = 1
@@ -273,10 +246,8 @@ object ArtificialNeuralNetwork {
       }
       tmp
     }
-
     val initialWeightsArr = new Array[Double](noWeights)
     var pos = 0
-
     l = 1
     while (l < topology.length) {
       i = 0
@@ -291,11 +262,13 @@ object ArtificialNeuralNetwork {
   }
 }
 
+/**
+ * Helper methods for ANN
+ */
 private[ann] trait ANNHelper {
   protected val topology: Array[Int]
   protected def g(x: Double) = 1.0 / (1.0 + math.exp(-x))
   protected val L = topology.length - 1
-
   protected val noWeights = {
     var tmp = 0
     var l = 1
@@ -305,7 +278,6 @@ private[ann] trait ANNHelper {
     }
     tmp
   }
-
   protected val ofsWeight: Array[Int] = {
     val tmp = new Array[Int](L + 1)
     var curPos = 0
@@ -318,7 +290,6 @@ private[ann] trait ANNHelper {
     }
     tmp
   }
-
   protected val noNodes: Int = {
     var tmp: Integer = 0
     var l = 0
@@ -328,7 +299,6 @@ private[ann] trait ANNHelper {
     }
     tmp
   }
-
   protected val ofsNode: Array[Int] = {
     val tmp = new Array[Int](L + 1)
     tmp(0) = 0
@@ -377,7 +347,6 @@ private class ANNLeastSquaresGradient(val topology: Array[Int]) extends Gradient
   override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
     val arrData = data.toArray
     val arrWeights = weights.toArray
-
     var i: Int = 0
     var j: Int = 0
     var l: Int = 0
@@ -389,7 +358,6 @@ private class ANNLeastSquaresGradient(val topology: Array[Int]) extends Gradient
       arrDiff(j) = arrNodes(ofsNode(L) + j) - arrData(topology(0) + j)
       j += 1
     }
-
     var err: Double = 0
     j = 0
     while (j < topology(L)) {
@@ -470,5 +438,4 @@ private class ANNUpdater extends Updater {
     brzAxpy(-thisIterStepSize, gradient.toBreeze, brzWeights)
     (Vectors.fromBreeze(brzWeights), 0)
   }
-
 }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
index 86894b5df4e6b..35743a2fc6119 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
@@ -38,105 +38,82 @@ class ANNSuite extends FunSuite with LocalSparkContext {
       (Vectors.dense(features), Vectors.dense(Array(label)))}
     val rddData = sc.parallelize(data, 2)
     val hiddenLayersTopology = Array[Int](hiddenSize)
-    val model = ArtificialNeuralNetwork.train(rddData, hiddenLayersTopology, 100, 1e-5)
+    val model = ArtificialNeuralNetwork.train(rddData, hiddenLayersTopology, 500, 1e-5)
     val predictionAndLabels = rddData.map { case(input, label) =>
       (model.predict(input)(0), label(0)) }.collect()
     assert(predictionAndLabels.forall { case(p, l) => (math.round(p) - l) == 0 })
   }
 
   test("Gradient of ANN") {
-
     val eps = 1e-6
     val accept = 1e-7
-
     val topologyArr = Array[Array[Int]](
       Array[Int](1, 5, 1),
       Array[Int](5, 10, 5, 3),
       Array[Int](128, 256, 128)
     )
-
     val rnd = new XORShiftRandom(0)
-
     var cnt = 0
     while( cnt<topologyArr.length ) {
-
       val topology = topologyArr(cnt)
       val L = topology.length - 1
       val noInp = topology(0)
       val noOut = topology(L)
       val annGradient = new ANNLeastSquaresGradient(topology)
       var noWeights = 0
-
       var l = 1
       while(l <= L) {
         noWeights += (topology(l - 1) + 1) * topology(l)
         l += 1
       }
-
       val arrWeights = new Array[Double](noWeights)
-
       var w = 0
       while(w < noWeights) {
         arrWeights(w) = rnd.nextDouble()
         w += 1
       }
-
       val arrInp = new Array[Double](noInp)
       val arrOut = new Array[Double](noOut)
       val arrData = new Array[Double](noInp + noOut)
-
       w = 0
       while(w < noInp) {
         arrInp(w) = rnd.nextDouble()
         arrData(w) = arrInp(w)
         w += 1
       }
-
       w = 0
       while(w < noOut) {
         arrOut(w) = rnd.nextDouble()
         arrData(noInp + w) = arrOut(w)
         w += 1
       }
-
       val data = Vectors.dense( arrData )
       val brzOut = Vectors.dense( arrOut ).toBreeze
       val weights = Vectors.dense( arrWeights )
       val gradient = annGradient.compute( data, 0.0, weights )._1
-
       val arrTmpWeights = new Array[Double]( noWeights )
       Array.copy(arrWeights, 0, arrTmpWeights, 0, noWeights )
       val tmpWeights = Vectors.dense( arrTmpWeights )
-
       w = 0
       while(w < noWeights)
       {
         arrTmpWeights(w) = arrTmpWeights(w) + eps
-
         val annModel1 = new ArtificialNeuralNetworkModel(weights, topology)
         val brzO1 = annModel1.predict(data).toBreeze
-
         val annModel2 = new ArtificialNeuralNetworkModel(tmpWeights, topology)
         val brzO2 = annModel2.predict(data).toBreeze
-
         val E1 = .5* (brzO1 - brzOut).dot(brzO1 - brzOut)
         val E2 = .5* (brzO2 - brzOut).dot(brzO2 - brzOut)
         val dEdW = ( E2 - E1 ) / eps
-
         val gradw = gradient(w)
         val err = dEdW - gradw
         assert(math.abs(err) < accept, 
       s"Difference between calculated and approximated gradient too large ($dEdW - $gradw = $err)"
         )
-
         arrTmpWeights(w) = arrTmpWeights(w) - eps
-
         w += 1
       }
-
       cnt += 1
     }
-
   }
-
 }

From e2d4e9264c590dddba7ebfe084ac89c4f40dd6ce Mon Sep 17 00:00:00 2001
From: Alexander Ulanov <nashb@yandex.ru>
Date: Fri, 26 Sep 2014 17:32:13 +0400
Subject: [PATCH 125/143] Unit test parameter

---
 mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
index 35743a2fc6119..4525391775eb3 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
@@ -38,7 +38,7 @@ class ANNSuite extends FunSuite with LocalSparkContext {
       (Vectors.dense(features), Vectors.dense(Array(label)))}
     val rddData = sc.parallelize(data, 2)
     val hiddenLayersTopology = Array[Int](hiddenSize)
-    val model = ArtificialNeuralNetwork.train(rddData, hiddenLayersTopology, 500, 1e-5)
+    val model = ArtificialNeuralNetwork.train(rddData, hiddenLayersTopology, 2000, 1e-5)
     val predictionAndLabels = rddData.map { case(input, label) =>
       (model.predict(input)(0), label(0)) }.collect()
     assert(predictionAndLabels.forall { case(p, l) => (math.round(p) - l) == 0 })

From fefe08ebf30b891fef41c1cf409a52104edd1e1c Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Sun, 28 Sep 2014 14:16:01 +0800
Subject: [PATCH 126/143] Update ANNSuite.scala

Make sure there are positive as well as negative weights in the gradient test.
---
 mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
index 4525391775eb3..981c6ce87c33e 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
@@ -69,7 +69,7 @@ class ANNSuite extends FunSuite with LocalSparkContext {
       val arrWeights = new Array[Double](noWeights)
       var w = 0
       while(w < noWeights) {
-        arrWeights(w) = rnd.nextDouble()
+        arrWeights(w) = rnd.nextDouble() * 4.8 - 2.4
         w += 1
       }
       val arrInp = new Array[Double](noInp)

From 2fbbe2391ccd441a19344b3c3540cf5817187ebb Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Tue, 28 Oct 2014 19:27:15 +0800
Subject: [PATCH 127/143] Update ArtificialNeuralNetwork.scala

Add support for customised initial weights
---
 .../mllib/ann/ArtificialNeuralNetwork.scala   | 85 +++++++++++++++++--
 1 file changed, 77 insertions(+), 8 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index 2d6110e2b1fc7..3ea5ca51eb968 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -36,7 +36,7 @@ import org.apache.spark.util.random.XORShiftRandom
  *
  * For a network of H hidden layers:
  *
- * hiddenLayersTopology(h) indicates the number of nodes in hidden layer h, excluding the bias 
+ * hiddenLayersTopology(h) indicates the number of nodes in hidden layer h, excluding the bias
  * node. h counts from 0 (first hidden layer, taking inputs from input layer) to H - 1 (last
  * hidden layer, sending outputs to the output layer).
  *
@@ -76,7 +76,7 @@ import org.apache.spark.util.random.XORShiftRandom
 
 /**
  * Artificial neural network (ANN) model
- * 
+ *
  * @param weights the weights between the neurons in the ANN.
  * @param topology array containing the number of nodes per layer in the network, including
  * the nodes in the input and output layer, but excluding the bias nodes.
@@ -171,7 +171,7 @@ object ArtificialNeuralNetwork {
       trainingRDD: RDD[(Vector, Vector)],
       hiddenLayersTopology: Array[Int],
       maxNumIterations: Int): ArtificialNeuralNetworkModel = {
-    train( trainingRDD, hiddenLayersTopology, maxNumIterations, defaultTolerance)
+    train(trainingRDD, hiddenLayersTopology, maxNumIterations, defaultTolerance)
   }
 
   /**
@@ -190,6 +190,23 @@ object ArtificialNeuralNetwork {
     train(trainingRDD, model, maxNumIterations, defaultTolerance)
   }
 
+  /**
+   * Trains an ANN with given initial weights.
+   * Uses default convergence tolerance 1e-4 for LBFGS.
+   *
+   * @param trainingRDD RDD containing (input, output) pairs for training.
+   * @param initialWeights initial weights vector.
+   * @param maxNumIterations maximum number of training iterations.
+   * @return ANN model.
+   */
+  def train(
+      trainingRDD: RDD[(Vector,Vector)],
+      hiddenLayersTopology: Array[Int],
+      initialWeights: Vector,
+      maxNumIterations: Int): ArtificialNeuralNetworkModel = {
+    train(trainingRDD, hiddenLayersTopology, initialWeights, maxNumIterations, defaultTolerance)
+  }
+
   /**
    * Trains an ANN using customized convergence tolerance.
    *
@@ -204,7 +221,7 @@ object ArtificialNeuralNetwork {
       model: ArtificialNeuralNetworkModel,
       maxNumIterations: Int,
       convergenceTol: Double): ArtificialNeuralNetworkModel = {
-    new ArtificialNeuralNetwork( model.topology, maxNumIterations, convergenceTol ).
+    new ArtificialNeuralNetwork(model.topology, maxNumIterations, convergenceTol).
       run(trainingRDD, model.weights)
   }
 
@@ -224,17 +241,69 @@ object ArtificialNeuralNetwork {
       convergenceTol: Double): ArtificialNeuralNetworkModel = {
     val topology = convertTopology(trainingRDD, hiddenLayersTopology)
     new ArtificialNeuralNetwork(topology, maxNumIterations, convergenceTol).
-      run(trainingRDD, randomWeights(topology))
+      run(trainingRDD, randomWeights(topology, false))
+  }
+
+  /**
+   * Trains an ANN with given initial weights.
+   *
+   * @param trainingRDD RDD containing (input, output) pairs for training.
+   * @param initialWeights initial weights vector.
+   * @param maxNumIterations maximum number of training iterations.
+   * @param convergenceTol convergence tolerance for LBFGS. Smaller value for closer convergence.
+   * @return ANN model.
+   */
+  def train(
+      trainingRDD: RDD[(Vector,Vector)],
+      hiddenLayersTopology: Array[Int],
+      initialWeights: Vector,
+      maxNumIterations: Int,
+      convergenceTol: Double): ArtificialNeuralNetworkModel = {
+    val topology = convertTopology(trainingRDD, hiddenLayersTopology)
+    new ArtificialNeuralNetwork(topology, maxNumIterations, convergenceTol).
+      run(trainingRDD, initialWeights)
+  }
+
+  /**
+   * Provides a random weights vector.
+   *
+   * @param trainingRDD RDD containing (input, output) pairs for training.
+   * @param hiddenLayersTopology number of nodes per hidden layer, excluding the bias nodes.
+   * @return random weights vector.
+   */
+  def getRandomWeights(
+      trainingRDD: RDD[(Vector,Vector)],
+      hiddenLayersTopology: Array[Int]): Vector = {
+    val topology = convertTopology(trainingRDD, hiddenLayersTopology)
+    return randomWeights(topology, false)
+  }
+
+  /**
+   * Provides a random weights vector, using given random seed.
+   *
+   * @param trainingRDD RDD containing (input, output) pairs for later training.
+   * @param hiddenLayersTopology number of nodes per hidden layer, excluding the bias nodes.
+   * @param seed random generator seed.
+   * @return random weights vector.
+   */
+  def getRandomWeights(
+      trainingRDD: RDD[(Vector,Vector)],
+      hiddenLayersTopology: Array[Int],
+      seed: Int): Vector = {
+    val topology = convertTopology(trainingRDD, hiddenLayersTopology)
+    return randomWeights(topology, true, seed)
   }
 
-  private def convertTopology( input: RDD[(Vector,Vector)],
+  private def convertTopology(
+      input: RDD[(Vector,Vector)],
       hiddenLayersTopology: Array[Int] ): Array[Int] = {
     val firstElt = input.first
     firstElt._1.size +: hiddenLayersTopology :+ firstElt._2.size
   }
 
-  private def randomWeights(topology: Array[Int]): Vector = {
-    val rand = new XORShiftRandom()
+  private def randomWeights(topology: Array[Int], useSeed: Boolean, seed: Int = 0): Vector = {
+    val rand: XORShiftRandom =
+      if( useSeed == false ) new XORShiftRandom() else new XORShiftRandom(seed)
     var i: Int = 0
     var l: Int = 0
     val noWeights = {

From 57565aedf3482ff92a51b83d760f8c56c4472f75 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Tue, 28 Oct 2014 19:28:33 +0800
Subject: [PATCH 128/143] Update ANNSuite.scala

Uses initial weights generated from fixed random seed for XOR test.
---
 .../test/scala/org/apache/spark/mllib/ann/ANNSuite.scala  | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
index 981c6ce87c33e..69445ef6471c5 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
@@ -33,12 +33,12 @@ class ANNSuite extends FunSuite with LocalSparkContext {
       Array[Double](1,1)
     )
     val outputs = Array[Double](0, 1, 1, 0)
-    val hiddenSize = 5
     val data = inputs.zip(outputs).map { case(features, label) =>
       (Vectors.dense(features), Vectors.dense(Array(label)))}
     val rddData = sc.parallelize(data, 2)
-    val hiddenLayersTopology = Array[Int](hiddenSize)
-    val model = ArtificialNeuralNetwork.train(rddData, hiddenLayersTopology, 2000, 1e-5)
+    val hiddenLayersTopology = Array[Int](5)
+    val initialWeights = ArtificialNeuralNetwork.getRandomWeights(rddData, hiddenLayersTopology, 0x01234567)
+    val model = ArtificialNeuralNetwork.train(rddData, hiddenLayersTopology, initialWeights, 200)
     val predictionAndLabels = rddData.map { case(input, label) =>
       (model.predict(input)(0), label(0)) }.collect()
     assert(predictionAndLabels.forall { case(p, l) => (math.round(p) - l) == 0 })
@@ -107,7 +107,7 @@ class ANNSuite extends FunSuite with LocalSparkContext {
         val dEdW = ( E2 - E1 ) / eps
         val gradw = gradient(w)
         val err = dEdW - gradw
-        assert(math.abs(err) < accept, 
+        assert(math.abs(err) < accept,
       s"Difference between calculated and approximated gradient too large ($dEdW - $gradw = $err)"
         )
         arrTmpWeights(w) = arrTmpWeights(w) - eps

From bd748344fb157fdbf243f8bb6e62e9075d515751 Mon Sep 17 00:00:00 2001
From: Alexander Ulanov <nashb@yandex.ru>
Date: Tue, 28 Oct 2014 15:34:30 -0700
Subject: [PATCH 129/143] Minor stylefix, add additional function for
 customized initial weights

---
 .../mllib/ann/ArtificialNeuralNetwork.scala   | 22 +++++++++++++++++--
 .../org/apache/spark/mllib/ann/ANNSuite.scala |  2 +-
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index 3ea5ca51eb968..392f6e458523f 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -271,7 +271,7 @@ object ArtificialNeuralNetwork {
    * @param hiddenLayersTopology number of nodes per hidden layer, excluding the bias nodes.
    * @return random weights vector.
    */
-  def getRandomWeights(
+  def randomWeights(
       trainingRDD: RDD[(Vector,Vector)],
       hiddenLayersTopology: Array[Int]): Vector = {
     val topology = convertTopology(trainingRDD, hiddenLayersTopology)
@@ -286,7 +286,7 @@ object ArtificialNeuralNetwork {
    * @param seed random generator seed.
    * @return random weights vector.
    */
-  def getRandomWeights(
+  def randomWeights(
       trainingRDD: RDD[(Vector,Vector)],
       hiddenLayersTopology: Array[Int],
       seed: Int): Vector = {
@@ -294,6 +294,24 @@ object ArtificialNeuralNetwork {
     return randomWeights(topology, true, seed)
   }
 
+  /**
+   * Provides a random weights vector, using given random seed.
+   *
+   * @param inputLayerSize size of input layer.
+   * @param outputLayerSize size of output layer.
+   * @param hiddenLayersTopology number of nodes per hidden layer, excluding the bias nodes.
+   * @param seed random generator seed.
+   * @return random weights vector.
+   */
+  def randomWeights(
+                     inputLayerSize: Int,
+                     outputLayerSize: Int,
+                     hiddenLayersTopology: Array[Int],
+                     seed: Int): Vector = {
+    val topology = inputLayerSize +: hiddenLayersTopology :+ outputLayerSize
+    return randomWeights(topology, true, seed)
+  }
+
   private def convertTopology(
       input: RDD[(Vector,Vector)],
       hiddenLayersTopology: Array[Int] ): Array[Int] = {
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
index 69445ef6471c5..d95846d97c3b7 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
@@ -37,7 +37,7 @@ class ANNSuite extends FunSuite with LocalSparkContext {
       (Vectors.dense(features), Vectors.dense(Array(label)))}
     val rddData = sc.parallelize(data, 2)
     val hiddenLayersTopology = Array[Int](5)
-    val initialWeights = ArtificialNeuralNetwork.getRandomWeights(rddData, hiddenLayersTopology, 0x01234567)
+    val initialWeights = ArtificialNeuralNetwork.randomWeights(rddData, hiddenLayersTopology, 0x01234567)
     val model = ArtificialNeuralNetwork.train(rddData, hiddenLayersTopology, initialWeights, 200)
     val predictionAndLabels = rddData.map { case(input, label) =>
       (model.predict(input)(0), label(0)) }.collect()

From 12fb903c7f1805f9d73b3eb5f867119dc0d1ba6e Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Mon, 3 Nov 2014 21:51:14 +0800
Subject: [PATCH 130/143] Update mllib-ann.md

Updated to the latest API
---
 docs/mllib-ann.md | 32 ++++++++++++++++++++++++--------
 1 file changed, 24 insertions(+), 8 deletions(-)

diff --git a/docs/mllib-ann.md b/docs/mllib-ann.md
index bd91b1439da09..effb162933e2c 100644
--- a/docs/mllib-ann.md
+++ b/docs/mllib-ann.md
@@ -37,7 +37,9 @@ approximation to arbitrary input values.
 
 The approximations can be calculated as follows:
 
+```
 val v_out = annModel.predict(v_in)
+```
 
 where v_in is either a Vector or an RDD of Vectors, and v_out respectively a Vector or RDD of
 (Vector,Vector) pairs, corresponding to input and output values.
@@ -101,9 +103,6 @@ The ANN also implements bias units. These are nodes that always output the value
 units are in all layers except the output layer. They act similar to other nodes, but do not
 have input.
 
-The "hiddenLayersTopology" array is converted into the "topology" array by adding the number of
-input nodes in front, and the number of output nodes at the end.
-
 The value of node N_{j,l} is calculated  as follows:
 
 `$N_{j,l} = g( \sum_{i=0}^{topology_l} W_{i,j,l)*N_{i,l-1} )$`
@@ -143,22 +142,39 @@ stop. A lower value of "convergenceTol" will give a higher precision.
 ## The "ArtificialNeuralNetwork" object
 
 The object "ArtificialNeuralNetwork" is the interface to the "ArtificialNeuralNetwork" class.
-The object contains the training function. There are four different instances of the training
+The object contains the training function. There are six different instances of the training
 function, each for use with different parameters. All take as the first parameter the RDD
 "input", which contains pairs of input and output vectors.
 
-* `def train(input: RDD[(Vector, Vector)], hiddenLayersTopology: Array[Int], maxNumIterations:
+In addition, there are three functions for generating random weights. Two take a fixed seed,
+which is useful for testing if one wants to start with the same weights in every test.
+
+* `def train(trainingRDD: RDD[(Vector, Vector)], hiddenLayersTopology: Array[Int], maxNumIterations:
 Int): ArtificialNeuralNetworkModel`: starts training with random initial weights, and a default
 convergenceTol=1e-4.
-* `def train(input: RDD[(Vector, Vector)], model: ArtificialNeuralNetworkModel,
+* `def train(trainingRDD: RDD[(Vector, Vector)], model: ArtificialNeuralNetworkModel,
 maxNumIterations: Int): ArtificialNeuralNetworkModel`: resumes training given an earlier
 calculated model, and a default convergenceTol=1e-4.
-* `def train(input: RDD[(Vector, Vector)], hiddenLayersTopology: Array[Int], maxNumIterations:
+* `def train(trainingRDD: RDD[(Vector,Vector)], hiddenLayersTopology: Array[Int],
+initialWeights: Vector, maxNumIterations: Int): ArtificialNeuralNetworkModel`: Trains an ANN
+with given initial weights, and a default convergenceTol=1e-4.
+* `def train(trainingRDD: RDD[(Vector, Vector)], hiddenLayersTopology: Array[Int], maxNumIterations:
 Int, convergenceTol: Double): ArtificialNeuralNetworkModel`: starts training with random
 initial weights. Allows setting a customised "convergenceTol".
-* `def train(input: RDD[(Vector, Vector)], model: ArtificialNeuralNetworkModel,
+* `def train(trainingRDD: RDD[(Vector, Vector)], model: ArtificialNeuralNetworkModel,
 maxNumIterations: Int, convergenceTol: Double): ArtificialNeuralNetworkModel`: resumes training
 given an earlier calculated model. Allows setting a customised "convergenceTol".
+* `def train(trainingRDD: RDD[(Vector,Vector)], hiddenLayersTopology: Array[Int],
+initialWeights: Vector, maxNumIterations: Int, convergenceTol: Double): 
+ArtificialNeuralNetworkModel`: Trains an ANN with given initial weights. Allows setting a
+customised "convergenceTol".
+* `def randomWeights(trainingRDD: RDD[(Vector,Vector)], hiddenLayersTopology: Array[Int]):
+Vector`: Generates a random weights vector.
+*`def randomWeights(trainingRDD: RDD[(Vector,Vector)], hiddenLayersTopology: Array[Int],
+seed: Int): Vector`: Generates a random weights vector with given seed.
+*`def randomWeights(inputLayerSize: Int, outputLayerSize: Int, hiddenLayersTopology: Array[Int],
+seed: Int): Vector`: Generates a random weights vector, using given random seed, input layer
+size, hidden layers topology and output layer size.
 
 Notice that the "hiddenLayersTopology" differs from the "topology" array. The
 "hiddenLayersTopology" does not include the number of nodes in the input and output layers. The

From a0d1da0818ee3cff39e53ef72a7e5a4b3bec4135 Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Mon, 3 Nov 2014 21:52:38 +0800
Subject: [PATCH 131/143] Update ArtificialNeuralNetwork.scala

Fix ScalaStyle
---
 .../spark/mllib/ann/ArtificialNeuralNetwork.scala      | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index 392f6e458523f..8524bfabefaf8 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -130,7 +130,7 @@ class ArtificialNeuralNetwork private[mllib](
     setConvergenceTol(convergenceTol).
     setMaxNumIterations(maxNumIterations)
 
- /**
+  /**
    * Trains the ANN model.
    * Uses default convergence tolerance 1e-4 for LBFGS.
    *
@@ -304,10 +304,10 @@ object ArtificialNeuralNetwork {
    * @return random weights vector.
    */
   def randomWeights(
-                     inputLayerSize: Int,
-                     outputLayerSize: Int,
-                     hiddenLayersTopology: Array[Int],
-                     seed: Int): Vector = {
+      inputLayerSize: Int,
+      outputLayerSize: Int,
+      hiddenLayersTopology: Array[Int],
+      seed: Int): Vector = {
     val topology = inputLayerSize +: hiddenLayersTopology :+ outputLayerSize
     return randomWeights(topology, true, seed)
   }

From 9b106660ac407a1f396e31789ca05b4d236d945e Mon Sep 17 00:00:00 2001
From: Bert Greevenbosch <bert.greevenbosch@huawei.com>
Date: Wed, 10 Dec 2014 21:52:47 +0800
Subject: [PATCH 132/143] Update mllib-ann.md

Statistical -> Stochastic
---
 docs/mllib-ann.md | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/docs/mllib-ann.md b/docs/mllib-ann.md
index effb162933e2c..dfbe173ffbacb 100644
--- a/docs/mllib-ann.md
+++ b/docs/mllib-ann.md
@@ -149,18 +149,18 @@ function, each for use with different parameters. All take as the first paramete
 In addition, there are three functions for generating random weights. Two take a fixed seed,
 which is useful for testing if one wants to start with the same weights in every test.
 
-* `def train(trainingRDD: RDD[(Vector, Vector)], hiddenLayersTopology: Array[Int], maxNumIterations:
-Int): ArtificialNeuralNetworkModel`: starts training with random initial weights, and a default
-convergenceTol=1e-4.
+* `def train(trainingRDD: RDD[(Vector, Vector)], hiddenLayersTopology: Array[Int],
+maxNumIterations: Int): ArtificialNeuralNetworkModel`: starts training with random initial
+weights, and a default convergenceTol=1e-4.
 * `def train(trainingRDD: RDD[(Vector, Vector)], model: ArtificialNeuralNetworkModel,
 maxNumIterations: Int): ArtificialNeuralNetworkModel`: resumes training given an earlier
 calculated model, and a default convergenceTol=1e-4.
 * `def train(trainingRDD: RDD[(Vector,Vector)], hiddenLayersTopology: Array[Int],
 initialWeights: Vector, maxNumIterations: Int): ArtificialNeuralNetworkModel`: Trains an ANN
 with given initial weights, and a default convergenceTol=1e-4.
-* `def train(trainingRDD: RDD[(Vector, Vector)], hiddenLayersTopology: Array[Int], maxNumIterations:
-Int, convergenceTol: Double): ArtificialNeuralNetworkModel`: starts training with random
-initial weights. Allows setting a customised "convergenceTol".
+* `def train(trainingRDD: RDD[(Vector, Vector)], hiddenLayersTopology: Array[Int],
+maxNumIterations: Int, convergenceTol: Double): ArtificialNeuralNetworkModel`: starts training
+with random initial weights. Allows setting a customised "convergenceTol".
 * `def train(trainingRDD: RDD[(Vector, Vector)], model: ArtificialNeuralNetworkModel,
 maxNumIterations: Int, convergenceTol: Double): ArtificialNeuralNetworkModel`: resumes training
 given an earlier calculated model. Allows setting a customised "convergenceTol".
@@ -199,7 +199,7 @@ The weights used by "predict" come from the model.
 ## Training
 
 We have chosen to implement the ANN with LBFGS as optimiser function. We compared it with
-Statistical Gradient Descent. LBGFS was much faster, but in accordance is also earlier with
+Stochastic Gradient Descent. LBGFS was much faster, but in accordance is also earlier with
 overfitting.
 
 Science has provided many different strategies to train an ANN. Hence it is important that the

From 3cf5f9ba00c287f31350dd8fb0a6679f26587773 Mon Sep 17 00:00:00 2001
From: Alexander Ulanov <nashb@yandex.ru>
Date: Tue, 16 Dec 2014 15:40:46 -0800
Subject: [PATCH 133/143] Fixes after rebase

---
 .../org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala  | 2 +-
 .../src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index 8524bfabefaf8..5e41cba9b636e 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -128,7 +128,7 @@ class ArtificialNeuralNetwork private[mllib](
   private val updater = new ANNUpdater()
   private val optimizer = new LBFGS(gradient, updater).
     setConvergenceTol(convergenceTol).
-    setMaxNumIterations(maxNumIterations)
+    setNumIterations(maxNumIterations)
 
   /**
    * Trains the ANN model.
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
index d95846d97c3b7..75bd3d11c36d9 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
@@ -18,12 +18,12 @@
 package org.apache.spark.mllib.ann
 
 import org.apache.spark.mllib.linalg.Vectors
-import org.apache.spark.mllib.util.LocalSparkContext
+import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.util.random.XORShiftRandom
 
 import org.scalatest.FunSuite
 
-class ANNSuite extends FunSuite with LocalSparkContext {
+class ANNSuite extends FunSuite with MLlibTestSparkContext {
 
   test("ANN learns XOR function") {
     val inputs = Array[Array[Double]](

From 398e3dd0b7b658e7933338786f604fff9be3e867 Mon Sep 17 00:00:00 2001
From: Alexander Ulanov <nashb@yandex.ru>
Date: Fri, 19 Dec 2014 12:25:51 -0800
Subject: [PATCH 134/143] Matrix form of back-propagation based on
 avulanov/spark/tree/neuralnetwork

---
 .../mllib/ann/ArtificialNeuralNetwork.scala   | 331 ++++++++----------
 1 file changed, 146 insertions(+), 185 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index 5e41cba9b636e..d32f79a237606 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -17,7 +17,11 @@
 
 package org.apache.spark.mllib.ann
 
-import breeze.linalg.{DenseVector, Vector => BV, axpy => brzAxpy}
+import breeze.linalg.{axpy => brzAxpy, Vector => BV, DenseVector => BDV,
+DenseMatrix => BDM, sum => Bsum, argmax => Bargmax, norm => Bnorm}
+import breeze.numerics.{sigmoid => Bsigmoid}
+import org.apache.spark.annotation.Experimental
+import org.apache.spark.mllib.linalg
 
 import org.apache.spark.mllib.linalg.{Vector, Vectors}
 import org.apache.spark.mllib.optimization._
@@ -82,7 +86,9 @@ import org.apache.spark.util.random.XORShiftRandom
  * the nodes in the input and output layer, but excluding the bias nodes.
  */
 class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topology: Array[Int])
-  extends Serializable with ANNHelper {
+  extends Serializable with NeuralHelper {
+
+  val (weightMatrices, bias) = unrollWeights(weights)
 
   /**
    * Predicts values for a single data point using the trained model.
@@ -91,7 +97,7 @@ class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topol
    * @return prediction using the trained model.
    */
   def predict(testData: Vector): Vector = {
-    Vectors.dense(computeValues(testData.toArray, weights.toArray))
+    Vectors.dense(computeValues(testData))
   }
 
   /**
@@ -104,9 +110,10 @@ class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topol
     testDataRDD.map(T => (T, predict(T)) )
   }
 
-  private def computeValues(arrData: Array[Double], arrWeights: Array[Double]): Array[Double] = {
-    val arrNodes = forwardRun(arrData, arrWeights)
-    arrNodes.slice(arrNodes.size - topology(L), arrNodes.size)
+  private def computeValues(testData: Vector): Array[Double] = {
+    /* TODO: BDM */
+    val outputs = forwardRun(testData.toBreeze.toDenseVector.toDenseMatrix.t, weightMatrices, bias)
+    outputs(topology.length - 1).toArray
   }
 }
 
@@ -119,9 +126,9 @@ class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topol
  * @param convergenceTol Convergence tolerance for LBFGS. Smaller value for closer convergence.
  */
 class ArtificialNeuralNetwork private[mllib](
-    topology: Array[Int],
-    maxNumIterations: Int,
-    convergenceTol: Double)
+                                              topology: Array[Int],
+                                              maxNumIterations: Int,
+                                              convergenceTol: Double)
   extends Serializable {
 
   private val gradient = new ANNLeastSquaresGradient(topology)
@@ -139,10 +146,10 @@ class ArtificialNeuralNetwork private[mllib](
    * @return ANN model.
    */
   private def run(trainingRDD: RDD[(Vector, Vector)], initialWeights: Vector):
-      ArtificialNeuralNetworkModel = {
+  ArtificialNeuralNetworkModel = {
     val data = trainingRDD.map(v =>
       (0.0,
-        Vectors.fromBreeze(DenseVector.vertcat(
+        Vectors.fromBreeze(BDV.vertcat(
           v._1.toBreeze.toDenseVector,
           v._2.toBreeze.toDenseVector))
         ))
@@ -168,9 +175,9 @@ object ArtificialNeuralNetwork {
    * @return ANN model.
    */
   def train(
-      trainingRDD: RDD[(Vector, Vector)],
-      hiddenLayersTopology: Array[Int],
-      maxNumIterations: Int): ArtificialNeuralNetworkModel = {
+             trainingRDD: RDD[(Vector, Vector)],
+             hiddenLayersTopology: Array[Int],
+             maxNumIterations: Int): ArtificialNeuralNetworkModel = {
     train(trainingRDD, hiddenLayersTopology, maxNumIterations, defaultTolerance)
   }
 
@@ -184,9 +191,9 @@ object ArtificialNeuralNetwork {
    * @return ANN model.
    */
   def train(
-      trainingRDD: RDD[(Vector,Vector)],
-      model: ArtificialNeuralNetworkModel,
-      maxNumIterations: Int): ArtificialNeuralNetworkModel = {
+             trainingRDD: RDD[(Vector,Vector)],
+             model: ArtificialNeuralNetworkModel,
+             maxNumIterations: Int): ArtificialNeuralNetworkModel = {
     train(trainingRDD, model, maxNumIterations, defaultTolerance)
   }
 
@@ -200,10 +207,10 @@ object ArtificialNeuralNetwork {
    * @return ANN model.
    */
   def train(
-      trainingRDD: RDD[(Vector,Vector)],
-      hiddenLayersTopology: Array[Int],
-      initialWeights: Vector,
-      maxNumIterations: Int): ArtificialNeuralNetworkModel = {
+             trainingRDD: RDD[(Vector,Vector)],
+             hiddenLayersTopology: Array[Int],
+             initialWeights: Vector,
+             maxNumIterations: Int): ArtificialNeuralNetworkModel = {
     train(trainingRDD, hiddenLayersTopology, initialWeights, maxNumIterations, defaultTolerance)
   }
 
@@ -217,10 +224,10 @@ object ArtificialNeuralNetwork {
    * @return ANN model.
    */
   def train(
-      trainingRDD: RDD[(Vector,Vector)],
-      model: ArtificialNeuralNetworkModel,
-      maxNumIterations: Int,
-      convergenceTol: Double): ArtificialNeuralNetworkModel = {
+             trainingRDD: RDD[(Vector,Vector)],
+             model: ArtificialNeuralNetworkModel,
+             maxNumIterations: Int,
+             convergenceTol: Double): ArtificialNeuralNetworkModel = {
     new ArtificialNeuralNetwork(model.topology, maxNumIterations, convergenceTol).
       run(trainingRDD, model.weights)
   }
@@ -235,10 +242,10 @@ object ArtificialNeuralNetwork {
    * @return ANN model.
    */
   def train(
-      trainingRDD: RDD[(Vector, Vector)],
-      hiddenLayersTopology: Array[Int],
-      maxNumIterations: Int,
-      convergenceTol: Double): ArtificialNeuralNetworkModel = {
+             trainingRDD: RDD[(Vector, Vector)],
+             hiddenLayersTopology: Array[Int],
+             maxNumIterations: Int,
+             convergenceTol: Double): ArtificialNeuralNetworkModel = {
     val topology = convertTopology(trainingRDD, hiddenLayersTopology)
     new ArtificialNeuralNetwork(topology, maxNumIterations, convergenceTol).
       run(trainingRDD, randomWeights(topology, false))
@@ -254,11 +261,11 @@ object ArtificialNeuralNetwork {
    * @return ANN model.
    */
   def train(
-      trainingRDD: RDD[(Vector,Vector)],
-      hiddenLayersTopology: Array[Int],
-      initialWeights: Vector,
-      maxNumIterations: Int,
-      convergenceTol: Double): ArtificialNeuralNetworkModel = {
+             trainingRDD: RDD[(Vector,Vector)],
+             hiddenLayersTopology: Array[Int],
+             initialWeights: Vector,
+             maxNumIterations: Int,
+             convergenceTol: Double): ArtificialNeuralNetworkModel = {
     val topology = convertTopology(trainingRDD, hiddenLayersTopology)
     new ArtificialNeuralNetwork(topology, maxNumIterations, convergenceTol).
       run(trainingRDD, initialWeights)
@@ -272,8 +279,8 @@ object ArtificialNeuralNetwork {
    * @return random weights vector.
    */
   def randomWeights(
-      trainingRDD: RDD[(Vector,Vector)],
-      hiddenLayersTopology: Array[Int]): Vector = {
+                     trainingRDD: RDD[(Vector,Vector)],
+                     hiddenLayersTopology: Array[Int]): Vector = {
     val topology = convertTopology(trainingRDD, hiddenLayersTopology)
     return randomWeights(topology, false)
   }
@@ -287,9 +294,9 @@ object ArtificialNeuralNetwork {
    * @return random weights vector.
    */
   def randomWeights(
-      trainingRDD: RDD[(Vector,Vector)],
-      hiddenLayersTopology: Array[Int],
-      seed: Int): Vector = {
+                     trainingRDD: RDD[(Vector,Vector)],
+                     hiddenLayersTopology: Array[Int],
+                     seed: Int): Vector = {
     val topology = convertTopology(trainingRDD, hiddenLayersTopology)
     return randomWeights(topology, true, seed)
   }
@@ -304,17 +311,18 @@ object ArtificialNeuralNetwork {
    * @return random weights vector.
    */
   def randomWeights(
-      inputLayerSize: Int,
-      outputLayerSize: Int,
-      hiddenLayersTopology: Array[Int],
-      seed: Int): Vector = {
+                     inputLayerSize: Int,
+                     outputLayerSize: Int,
+                     hiddenLayersTopology: Array[Int],
+                     seed: Int): Vector = {
+
     val topology = inputLayerSize +: hiddenLayersTopology :+ outputLayerSize
     return randomWeights(topology, true, seed)
   }
 
   private def convertTopology(
-      input: RDD[(Vector,Vector)],
-      hiddenLayersTopology: Array[Int] ): Array[Int] = {
+                               input: RDD[(Vector,Vector)],
+                               hiddenLayersTopology: Array[Int] ): Array[Int] = {
     val firstElt = input.first
     firstElt._1.size +: hiddenLayersTopology :+ firstElt._2.size
   }
@@ -349,163 +357,116 @@ object ArtificialNeuralNetwork {
   }
 }
 
+
 /**
- * Helper methods for ANN
+ * ::Experimental::
+ * Trait for roll/unroll weights and forward/back propagation in neural network
  */
-private[ann] trait ANNHelper {
+@Experimental
+private[ann] trait NeuralHelper {
   protected val topology: Array[Int]
-  protected def g(x: Double) = 1.0 / (1.0 + math.exp(-x))
-  protected val L = topology.length - 1
-  protected val noWeights = {
-    var tmp = 0
-    var l = 1
-    while (l <= L) {
-      tmp = tmp + topology(l) * (topology(l - 1) + 1)
-      l += 1
+  protected val weightCount =
+    (for(i <- 1 until topology.size) yield (topology(i) * topology(i - 1))).sum +
+      topology.sum - topology(0)
+
+  protected def unrollWeights(weights: linalg.Vector): (Array[BDM[Double]], Array[BDM[Double]]) = {
+    require(weights.size == weightCount)
+    val weightsCopy = weights.toArray
+    val weightMatrices = new Array[BDM[Double]](topology.size)
+    var offset = 0
+    for(i <- 1 until topology.size){
+      weightMatrices(i) = new BDM[Double](topology(i), topology(i - 1), weightsCopy, offset)
+      offset += topology(i) * topology(i - 1)
     }
-    tmp
-  }
-  protected val ofsWeight: Array[Int] = {
-    val tmp = new Array[Int](L + 1)
-    var curPos = 0
-    tmp(0) = 0
-    var l = 1
-    while (l <= L) {
-      tmp(l) = curPos
-      curPos = curPos + (topology(l - 1) + 1) * topology(l)
-      l += 1
+    val bias = new Array[BDM[Double]](topology.size)
+    for(i <- 1 until topology.size){
+      /* TODO: BDM */
+      bias(i) = (new BDV[Double](weightsCopy, offset, 1, topology(i))).toDenseMatrix.t
+      offset += topology(i)
     }
-    tmp
+    (weightMatrices, bias)
   }
-  protected val noNodes: Int = {
-    var tmp: Integer = 0
-    var l = 0
-    while (l < topology.size) {
-      tmp = tmp + topology(l)
-      l += 1
+
+  protected def rollWeights(weightMatricesUpdate: Array[BDM[Double]],
+                            biasUpdate: Array[BDM[Double]]) = {
+    val wu = BDV.zeros[Double](weightCount)
+    var offset = 0
+    for(i <- 1 until topology.size){
+      for(j <- 0 until weightMatricesUpdate(i).cols){
+        wu(offset until (offset + weightMatricesUpdate(i).rows)) := weightMatricesUpdate(i)(::, j)
+        offset += weightMatricesUpdate(i).rows
+      }
     }
-    tmp
-  }
-  protected val ofsNode: Array[Int] = {
-    val tmp = new Array[Int](L + 1)
-    tmp(0) = 0
-    var l = 1
-    while (l <= L) {
-      tmp(l) = tmp(l - 1) + topology(l - 1)
-      l += 1
+    for(i <- 1 until topology.size){
+      wu(offset until offset + topology(i)) := biasUpdate(i)(::, 0)
+      offset += topology(i)
     }
-    tmp
+    wu
   }
 
-  protected def forwardRun(arrData: Array[Double], arrWeights: Array[Double]): Array[Double] = {
-    val arrNodes = new Array[Double](noNodes)
-    var i: Int = 0
-    var j: Int = 0
-    var l: Int = 0
-    i = 0
-    while (i < topology(0)) {
-      arrNodes(i) = arrData(i)
-      i += 1
+  protected def forwardRun(data: BDM[Double], weightMatrices: Array[BDM[Double]],
+                           bias: Array[BDM[Double]]): Array[BDM[Double]] = {
+    val outArray = new Array[BDM[Double]](topology.size)
+    outArray(0) = data
+    for(i <- 1 until topology.size) {
+      outArray(i) = weightMatrices(i) * outArray(i - 1) :+ bias(i)
+      Bsigmoid.inPlace(outArray(i))
     }
-    l = 1
-    while (l <= L) {
-      j = 0
-      while (j < topology(l)) {
-        var cum: Double = 0.0
-        i = 0
-        while (i < topology(l - 1)) {
-          cum = cum +
-            arrWeights(ofsWeight(l) + (topology(l - 1) + 1) * j + i) *
-              arrNodes(ofsNode(l - 1) + i)
-          i += 1
-        }
-        cum = cum + arrWeights(ofsWeight(l) + (topology(l - 1) + 1) * j + topology(l - 1))
-        arrNodes(ofsNode(l) + j) = g(cum)
-        j += 1
+    outArray
+  }
+
+  protected def wGradient(weightMatrices: Array[BDM[Double]],
+                          targetOutput: BDM[Double],
+                          outputs: Array[BDM[Double]]):
+  (Array[BDM[Double]], Array[BDM[Double]]) = {
+    /* error back propagation */
+    val deltas = new Array[BDM[Double]](topology.size)
+    for(i <- (topology.size - 1) until (0, -1)){
+      /* TODO: DBM */
+      val onesVector = BDV.ones[Double](outputs(i).rows).toDenseMatrix.t
+      val outPrime = (onesVector :- outputs(i)) :* outputs(i)
+      if(i == topology.size - 1){
+        deltas(i) = (outputs(i) :- targetOutput) :* outPrime
+      }else{
+        deltas(i) = (weightMatrices(i + 1).t * deltas(i + 1)) :* outPrime
       }
-      l += 1
     }
-    arrNodes
+    /* gradient */
+    val gradientMatrices = new Array[BDM[Double]](topology.size)
+    for(i <- (topology.size - 1) until (0, -1)) {
+      gradientMatrices(i) = deltas(i) * outputs(i - 1).t
+    }
+    (gradientMatrices, deltas)
   }
 }
 
-private class ANNLeastSquaresGradient(val topology: Array[Int]) extends Gradient with ANNHelper {
+
+private class ANNLeastSquaresGradient(val topology: Array[Int]) extends Gradient with NeuralHelper {
 
   override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
     val arrData = data.toArray
-    val arrWeights = weights.toArray
-    var i: Int = 0
-    var j: Int = 0
-    var l: Int = 0
-    // forward run
-    val arrNodes = forwardRun(arrData, arrWeights)
-    val arrDiff = new Array[Double](topology(L))
-    j = 0
-    while (j < topology(L)) {
-      arrDiff(j) = arrNodes(ofsNode(L) + j) - arrData(topology(0) + j)
-      j += 1
-    }
-    var err: Double = 0
-    j = 0
-    while (j < topology(L)) {
-      err = err + arrDiff(j) * arrDiff(j)
-      j += 1
-    }
-    err = err * .5
-    // back propagation
-    val arrDelta = new Array[Double](noNodes)
-    j = 0
-    while (j < topology(L)) {
-      arrDelta(ofsNode(L) + j) =
-        arrDiff(j) *
-          arrNodes(ofsNode(L) + j) * (1 - arrNodes(ofsNode(L) + j))
-      j += 1
-    }
-    l = L - 1
-    while (l > 0) {
-      j = 0
-      while (j < topology(l)) {
-        var cum: Double = 0.0
-        i = 0
-        while (i < topology(l + 1)) {
-          cum = cum +
-            arrWeights(ofsWeight(l + 1) + (topology(l) + 1) * i + j) *
-              arrDelta(ofsNode(l + 1) + i) *
-              arrNodes(ofsNode(l) + j) * (1 - arrNodes(ofsNode(l) + j))
-          i += 1
-        }
-        arrDelta(ofsNode(l) + j) = cum
-        j += 1
-      }
-      l -= 1
-    }
-    // gradient
-    val arrGrad = new Array[Double](noWeights)
-    l = 1
-    while (l <= L) {
-      j = 0
-      while (j < topology(l)) {
-        i = 0
-        while (i < topology(l - 1)) {
-          arrGrad(ofsWeight(l) + (topology(l - 1) + 1) * j + i) =
-            arrNodes(ofsNode(l - 1) + i) *
-              arrDelta(ofsNode(l) + j)
-          i += 1
-        }
-        arrGrad(ofsWeight(l) + (topology(l - 1) + 1) * j + topology(l - 1)) =
-          arrDelta(ofsNode(l) + j)
-        j += 1
-      }
-      l += 1
-    }
-    (Vectors.dense(arrGrad), err)
+    val input = new BDV(arrData, 0, 1, topology(0)).toDenseMatrix.t
+    val targetVector =
+      new BDV(arrData, topology(0), 1, arrData.length - topology(0)).toDenseMatrix.t
+    val (weightMatrices, bias) = unrollWeights(weights)
+    /* forward run */
+    val outputs = forwardRun(input, weightMatrices, bias)
+    /* error back propagation */
+    val (gradientMatrices, errors) = wGradient(weightMatrices, targetVector, outputs)
+    val weightsGradient = rollWeights(gradientMatrices, errors)
+
+    /* error */
+    val delta = targetVector :- outputs(topology.size - 1)
+    val outerError = Bsum(delta :* delta) / 2
+    val result = (Vectors.fromBreeze(weightsGradient), outerError)
+    result
   }
 
   override def compute(
-      data: Vector,
-      label: Double,
-      weights: Vector,
-      cumGradient: Vector): Double = {
+                        data: Vector,
+                        label: Double,
+                        weights: Vector,
+                        cumGradient: Vector): Double = {
     val (grad, err) = compute(data, label, weights)
     cumGradient.toBreeze += grad.toBreeze
     err
@@ -515,11 +476,11 @@ private class ANNLeastSquaresGradient(val topology: Array[Int]) extends Gradient
 private class ANNUpdater extends Updater {
 
   override def compute(
-      weightsOld: Vector,
-      gradient: Vector,
-      stepSize: Double,
-      iter: Int,
-      regParam: Double): (Vector, Double) = {
+                        weightsOld: Vector,
+                        gradient: Vector,
+                        stepSize: Double,
+                        iter: Int,
+                        regParam: Double): (Vector, Double) = {
     val thisIterStepSize = stepSize
     val brzWeights: BV[Double] = weightsOld.toBreeze.toDenseVector
     brzAxpy(-thisIterStepSize, gradient.toBreeze, brzWeights)

From 62b1d91e982f591f45441836dc3c15e3c4a8454c Mon Sep 17 00:00:00 2001
From: Alexander Ulanov <nashb@yandex.ru>
Date: Fri, 19 Dec 2014 13:31:52 -0800
Subject: [PATCH 135/143] Fix of broken gradient test

---
 .../mllib/ann/ArtificialNeuralNetwork.scala   |  1 -
 .../org/apache/spark/mllib/ann/ANNSuite.scala | 24 ++++++++++---------
 2 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index d32f79a237606..9cbff328aebe2 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -454,7 +454,6 @@ private class ANNLeastSquaresGradient(val topology: Array[Int]) extends Gradient
     /* error back propagation */
     val (gradientMatrices, errors) = wGradient(weightMatrices, targetVector, outputs)
     val weightsGradient = rollWeights(gradientMatrices, errors)
-
     /* error */
     val delta = targetVector :- outputs(topology.size - 1)
     val outerError = Bsum(delta :* delta) / 2
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
index 75bd3d11c36d9..e321837dfef0d 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
@@ -17,9 +17,10 @@
 
 package org.apache.spark.mllib.ann
 
-import org.apache.spark.mllib.linalg.Vectors
+import org.apache.spark.mllib.linalg.{DenseVector, Vectors, Vector}
 import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.apache.spark.util.random.XORShiftRandom
+import breeze.linalg.{DenseVector => BDV}
 
 import org.scalatest.FunSuite
 
@@ -55,15 +56,15 @@ class ANNSuite extends FunSuite with MLlibTestSparkContext {
     val rnd = new XORShiftRandom(0)
     var cnt = 0
     while( cnt<topologyArr.length ) {
-      val topology = topologyArr(cnt)
-      val L = topology.length - 1
-      val noInp = topology(0)
-      val noOut = topology(L)
-      val annGradient = new ANNLeastSquaresGradient(topology)
+      val nextTopology = topologyArr(cnt)
+      val L = nextTopology.length - 1
+      val noInp = nextTopology(0)
+      val noOut = nextTopology(L)
+      val annGradient = new ANNLeastSquaresGradient(nextTopology)
       var noWeights = 0
       var l = 1
       while(l <= L) {
-        noWeights += (topology(l - 1) + 1) * topology(l)
+        noWeights += (nextTopology(l - 1) + 1) * nextTopology(l)
         l += 1
       }
       val arrWeights = new Array[Double](noWeights)
@@ -98,10 +99,11 @@ class ANNSuite extends FunSuite with MLlibTestSparkContext {
       while(w < noWeights)
       {
         arrTmpWeights(w) = arrTmpWeights(w) + eps
-        val annModel1 = new ArtificialNeuralNetworkModel(weights, topology)
-        val brzO1 = annModel1.predict(data).toBreeze
-        val annModel2 = new ArtificialNeuralNetworkModel(tmpWeights, topology)
-        val brzO2 = annModel2.predict(data).toBreeze
+        val iData = Vectors.dense(arrData.slice(0, noInp))
+        val annModel1 = new ArtificialNeuralNetworkModel(weights, nextTopology)
+        val brzO1 = annModel1.predict(iData).toBreeze
+        val annModel2 = new ArtificialNeuralNetworkModel(tmpWeights, nextTopology)
+        val brzO2 = annModel2.predict(iData).toBreeze
         val E1 = .5* (brzO1 - brzOut).dot(brzO1 - brzOut)
         val E2 = .5* (brzO2 - brzOut).dot(brzO2 - brzOut)
         val dEdW = ( E2 - E1 ) / eps

From 3f93a2a0300c656c99e13c260684d8d7c5ffb1a6 Mon Sep 17 00:00:00 2001
From: Alexander Ulanov <nashb@yandex.ru>
Date: Fri, 19 Dec 2014 17:06:52 -0800
Subject: [PATCH 136/143] Roll/unroll ordering, weight by layer function

---
 .../mllib/ann/ArtificialNeuralNetwork.scala   | 37 +++++++++++++++----
 1 file changed, 29 insertions(+), 8 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index 9cbff328aebe2..90dd1b1dd184a 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -97,7 +97,7 @@ class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topol
    * @return prediction using the trained model.
    */
   def predict(testData: Vector): Vector = {
-    Vectors.dense(computeValues(testData))
+    Vectors.dense(computeValues(testData, topology.length - 1))
   }
 
   /**
@@ -110,10 +110,35 @@ class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topol
     testDataRDD.map(T => (T, predict(T)) )
   }
 
-  private def computeValues(testData: Vector): Array[Double] = {
+  private def computeValues(testData: Vector, layer: Int): Array[Double] = {
+    require(layer >=0 && layer < topology.length)
     /* TODO: BDM */
     val outputs = forwardRun(testData.toBreeze.toDenseVector.toDenseMatrix.t, weightMatrices, bias)
-    outputs(topology.length - 1).toArray
+    outputs(layer).toArray
+  }
+
+  /**
+   * Returns output values of a given layer for a single data point using the trained model.
+   *
+   * @param testData RDD represents a single data point.
+   * @param layer index of a network layer
+   * @return output of a given layer.
+   */
+  def output(testData: Vector, layer: Int): Vector = {
+    Vectors.dense(computeValues(testData, layer))
+  }
+
+  /**
+   * Returns weights for a given layer in vector form.
+   *
+   * @param index index of a layer: ranges from 1 until topology.length.
+   *              (no weights for the 0 layer)
+   * @return weights.
+   */
+  def weightsByLayer(index: Int): Vector = {
+    require(index > 0 && index < topology.length)
+    val layerWeight = BDV.vertcat(weightMatrices(index).toDenseVector, bias(index).toDenseVector)
+    Vectors.dense(layerWeight.toArray)
   }
 }
 
@@ -373,13 +398,11 @@ private[ann] trait NeuralHelper {
     require(weights.size == weightCount)
     val weightsCopy = weights.toArray
     val weightMatrices = new Array[BDM[Double]](topology.size)
+    val bias = new Array[BDM[Double]](topology.size)
     var offset = 0
     for(i <- 1 until topology.size){
       weightMatrices(i) = new BDM[Double](topology(i), topology(i - 1), weightsCopy, offset)
       offset += topology(i) * topology(i - 1)
-    }
-    val bias = new Array[BDM[Double]](topology.size)
-    for(i <- 1 until topology.size){
       /* TODO: BDM */
       bias(i) = (new BDV[Double](weightsCopy, offset, 1, topology(i))).toDenseMatrix.t
       offset += topology(i)
@@ -396,8 +419,6 @@ private[ann] trait NeuralHelper {
         wu(offset until (offset + weightMatricesUpdate(i).rows)) := weightMatricesUpdate(i)(::, j)
         offset += weightMatricesUpdate(i).rows
       }
-    }
-    for(i <- 1 until topology.size){
       wu(offset until offset + topology(i)) := biasUpdate(i)(::, 0)
       offset += topology(i)
     }

From 2fb67f613aad0bccd97c91548c734507fbeb58d7 Mon Sep 17 00:00:00 2001
From: Alexander Ulanov <nashb@yandex.ru>
Date: Mon, 22 Dec 2014 16:20:11 -0800
Subject: [PATCH 137/143] Roll and cumulative update optimizations

---
 .../mllib/ann/ArtificialNeuralNetwork.scala   | 53 ++++++++++---------
 1 file changed, 28 insertions(+), 25 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index 90dd1b1dd184a..acde45f30733c 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -23,7 +23,7 @@ import breeze.numerics.{sigmoid => Bsigmoid}
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.mllib.linalg
 
-import org.apache.spark.mllib.linalg.{Vector, Vectors}
+import org.apache.spark.mllib.linalg.{DenseVector, Vector, Vectors}
 import org.apache.spark.mllib.optimization._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.util.random.XORShiftRandom
@@ -411,18 +411,25 @@ private[ann] trait NeuralHelper {
   }
 
   protected def rollWeights(weightMatricesUpdate: Array[BDM[Double]],
-                            biasUpdate: Array[BDM[Double]]) = {
-    val wu = BDV.zeros[Double](weightCount)
+                            biasUpdate: Array[BDM[Double]],
+                            cumGradient: Vector): Unit = {
+    val wu = cumGradient.toArray
     var offset = 0
-    for(i <- 1 until topology.size){
-      for(j <- 0 until weightMatricesUpdate(i).cols){
-        wu(offset until (offset + weightMatricesUpdate(i).rows)) := weightMatricesUpdate(i)(::, j)
-        offset += weightMatricesUpdate(i).rows
+    for(i <- 1 until topology.length){
+      var k = 0
+      val numElements = topology(i) * topology(i - 1)
+      while(k < numElements){
+        wu(offset + k) += weightMatricesUpdate(i).data(k)
+        k += 1
+      }
+      offset += numElements
+      k = 0
+      while(k < topology(i)){
+        wu(offset + k) += biasUpdate(i).data(k)
+        k += 1
       }
-      wu(offset until offset + topology(i)) := biasUpdate(i)(::, 0)
       offset += topology(i)
     }
-    wu
   }
 
   protected def forwardRun(data: BDM[Double], weightMatrices: Array[BDM[Double]],
@@ -465,6 +472,13 @@ private[ann] trait NeuralHelper {
 private class ANNLeastSquaresGradient(val topology: Array[Int]) extends Gradient with NeuralHelper {
 
   override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
+    val gradient = Vectors.zeros(weights.size)
+    val loss = compute(data, label, weights, gradient)
+    (gradient, loss)
+  }
+
+  override def compute(data: Vector, label: Double, weights: Vector,
+                       cumGradient: Vector): Double = {
     val arrData = data.toArray
     val input = new BDV(arrData, 0, 1, topology(0)).toDenseMatrix.t
     val targetVector =
@@ -473,23 +487,12 @@ private class ANNLeastSquaresGradient(val topology: Array[Int]) extends Gradient
     /* forward run */
     val outputs = forwardRun(input, weightMatrices, bias)
     /* error back propagation */
-    val (gradientMatrices, errors) = wGradient(weightMatrices, targetVector, outputs)
-    val weightsGradient = rollWeights(gradientMatrices, errors)
+    val (gradientMatrices, deltas) = wGradient(weightMatrices, targetVector, outputs)
+    rollWeights(gradientMatrices, deltas, cumGradient)
     /* error */
-    val delta = targetVector :- outputs(topology.size - 1)
-    val outerError = Bsum(delta :* delta) / 2
-    val result = (Vectors.fromBreeze(weightsGradient), outerError)
-    result
-  }
-
-  override def compute(
-                        data: Vector,
-                        label: Double,
-                        weights: Vector,
-                        cumGradient: Vector): Double = {
-    val (grad, err) = compute(data, label, weights)
-    cumGradient.toBreeze += grad.toBreeze
-    err
+    val diff = targetVector :- outputs(topology.size - 1)
+    val outerError = Bsum(diff :* diff) / 2
+    outerError
   }
 }
 

From 799b27783d18662ae6dc22e4f5593a92b742d65f Mon Sep 17 00:00:00 2001
From: lizhengbing <zhengbing.li@huawei.com>
Date: Sat, 27 Dec 2014 13:43:53 +0800
Subject: [PATCH 138/143] Update ANNSuite.scala

Add comments for  test("Gradient of ANN")
---
 .../org/apache/spark/mllib/ann/ANNSuite.scala      | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
index e321837dfef0d..252fab7f94fd7 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
@@ -44,7 +44,19 @@ class ANNSuite extends FunSuite with MLlibTestSparkContext {
       (model.predict(input)(0), label(0)) }.collect()
     assert(predictionAndLabels.forall { case(p, l) => (math.round(p) - l) == 0 })
   }
-
+  
+  /*
+  This test compares the output of the annGradient.compute function with the following
+  approximations:
+    
+  dE / dw_k ~= ( E(w + eps*e_k, x) - E(w, x) ) / eps
+    
+  where E(w, x) is the summed squared error multiplied by a factor 0.5, given weight vector w
+  and input x, w_k the k-th element in the weight vector (starting with k=0) and e_k the
+  associated k-th cartesian unit vector.
+  
+  The test is passed when the difference is less than accept=1e-7 with eps=1e-6.
+  */
   test("Gradient of ANN") {
     val eps = 1e-6
     val accept = 1e-7

From 6166ad92a358f451699cd2f3caf4a093635e1c0e Mon Sep 17 00:00:00 2001
From: Alexander Ulanov <nashb@yandex.ru>
Date: Fri, 9 Jan 2015 17:35:16 -0800
Subject: [PATCH 139/143] Batch ANN

---
 .../mllib/ann/ArtificialNeuralNetwork.scala   | 78 ++++++++++++++-----
 .../org/apache/spark/mllib/ann/ANNSuite.scala |  2 +-
 2 files changed, 59 insertions(+), 21 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index acde45f30733c..b3b6a8b075137 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -18,12 +18,12 @@
 package org.apache.spark.mllib.ann
 
 import breeze.linalg.{axpy => brzAxpy, Vector => BV, DenseVector => BDV,
-DenseMatrix => BDM, sum => Bsum, argmax => Bargmax, norm => Bnorm}
+DenseMatrix => BDM, sum => Bsum, argmax => Bargmax, norm => Bnorm, *}
 import breeze.numerics.{sigmoid => Bsigmoid}
 import org.apache.spark.annotation.Experimental
 import org.apache.spark.mllib.linalg
 
-import org.apache.spark.mllib.linalg.{DenseVector, Vector, Vectors}
+import org.apache.spark.mllib.linalg.{DenseMatrix, DenseVector, Vector, Vectors}
 import org.apache.spark.mllib.optimization._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.util.random.XORShiftRandom
@@ -153,10 +153,11 @@ class ArtificialNeuralNetworkModel private[mllib](val weights: Vector, val topol
 class ArtificialNeuralNetwork private[mllib](
                                               topology: Array[Int],
                                               maxNumIterations: Int,
-                                              convergenceTol: Double)
+                                              convergenceTol: Double,
+                                              batchSize: Int = 1)
   extends Serializable {
 
-  private val gradient = new ANNLeastSquaresGradient(topology)
+  private val gradient = new ANNLeastSquaresGradient(topology, batchSize)
   private val updater = new ANNUpdater()
   private val optimizer = new LBFGS(gradient, updater).
     setConvergenceTol(convergenceTol).
@@ -172,12 +173,28 @@ class ArtificialNeuralNetwork private[mllib](
    */
   private def run(trainingRDD: RDD[(Vector, Vector)], initialWeights: Vector):
   ArtificialNeuralNetworkModel = {
-    val data = trainingRDD.map(v =>
-      (0.0,
-        Vectors.fromBreeze(BDV.vertcat(
-          v._1.toBreeze.toDenseVector,
-          v._2.toBreeze.toDenseVector))
-        ))
+    val data = if (batchSize == 1) {
+      trainingRDD.map(v =>
+        (0.0,
+          Vectors.fromBreeze(BDV.vertcat(
+            v._1.toBreeze.toDenseVector,
+            v._2.toBreeze.toDenseVector))
+          ))
+    } else { trainingRDD.mapPartitions { it =>
+      it.grouped(batchSize).map { seq =>
+        val size = seq.size
+        val bigVector = new Array[Double](topology(0) * size + topology.last * size)
+        var i = 0
+        seq.foreach { case (in, out) =>
+          System.arraycopy(in.toArray, 0, bigVector, i * topology(0), topology(0))
+          System.arraycopy(out.toArray, 0, bigVector,
+            topology(0) * size + i * topology.last, topology.last)
+          i += 1
+        }
+        (0.0, Vectors.dense(bigVector))
+      }
+    }
+    }
     val weights = optimizer.optimize(data, initialWeights)
     new ArtificialNeuralNetworkModel(weights, topology)
   }
@@ -190,6 +207,21 @@ object ArtificialNeuralNetwork {
 
   private val defaultTolerance: Double = 1e-4
 
+
+  def train(trainingRDD: RDD[(Vector, Vector)], batchSize: Int, hiddenLayersTopology: Array[Int],
+            initialWeights: Vector, maxNumIterations: Int) : ArtificialNeuralNetworkModel = {
+    val topology = convertTopology(trainingRDD, hiddenLayersTopology)
+    new ArtificialNeuralNetwork(topology, maxNumIterations, defaultTolerance, batchSize).
+      run(trainingRDD, initialWeights)
+  }
+
+  def train(trainingRDD: RDD[(Vector, Vector)], batchSize: Int, hiddenLayersTopology: Array[Int],
+            maxNumIterations: Int) : ArtificialNeuralNetworkModel = {
+    val topology = convertTopology(trainingRDD, hiddenLayersTopology)
+    new ArtificialNeuralNetwork(topology, maxNumIterations, defaultTolerance, batchSize).
+      run(trainingRDD, randomWeights(topology, false))
+  }
+
   /**
    * Trains an ANN.
    * Uses default convergence tolerance 1e-4 for LBFGS.
@@ -437,7 +469,8 @@ private[ann] trait NeuralHelper {
     val outArray = new Array[BDM[Double]](topology.size)
     outArray(0) = data
     for(i <- 1 until topology.size) {
-      outArray(i) = weightMatrices(i) * outArray(i - 1) :+ bias(i)
+      outArray(i) = weightMatrices(i) * outArray(i - 1)// :+ bias(i))
+      outArray(i)(::, *) :+= bias(i).toDenseVector
       Bsigmoid.inPlace(outArray(i))
     }
     outArray
@@ -450,9 +483,10 @@ private[ann] trait NeuralHelper {
     /* error back propagation */
     val deltas = new Array[BDM[Double]](topology.size)
     for(i <- (topology.size - 1) until (0, -1)){
-      /* TODO: DBM */
-      val onesVector = BDV.ones[Double](outputs(i).rows).toDenseMatrix.t
-      val outPrime = (onesVector :- outputs(i)) :* outputs(i)
+      /* TODO: GEMM? */
+      val outPrime = BDM.ones[Double](outputs(i).rows, outputs(i).cols)
+      outPrime :-= outputs(i)
+      outPrime :*= outputs(i)
       if(i == topology.size - 1){
         deltas(i) = (outputs(i) :- targetOutput) :* outPrime
       }else{
@@ -462,14 +496,17 @@ private[ann] trait NeuralHelper {
     /* gradient */
     val gradientMatrices = new Array[BDM[Double]](topology.size)
     for(i <- (topology.size - 1) until (0, -1)) {
+      /* TODO: GEMM? */
       gradientMatrices(i) = deltas(i) * outputs(i - 1).t
+      gradientMatrices(i) :*= outputs(i).cols.toDouble
     }
     (gradientMatrices, deltas)
   }
 }
 
 
-private class ANNLeastSquaresGradient(val topology: Array[Int]) extends Gradient with NeuralHelper {
+private class ANNLeastSquaresGradient(val topology: Array[Int],
+                                      val batchSize: Int = 1) extends Gradient with NeuralHelper {
 
   override def compute(data: Vector, label: Double, weights: Vector): (Vector, Double) = {
     val gradient = Vectors.zeros(weights.size)
@@ -480,17 +517,18 @@ private class ANNLeastSquaresGradient(val topology: Array[Int]) extends Gradient
   override def compute(data: Vector, label: Double, weights: Vector,
                        cumGradient: Vector): Double = {
     val arrData = data.toArray
-    val input = new BDV(arrData, 0, 1, topology(0)).toDenseMatrix.t
-    val targetVector =
-      new BDV(arrData, topology(0), 1, arrData.length - topology(0)).toDenseMatrix.t
+    val realBatchSize = arrData.length / (topology(0) + topology.last)
+    val input = new BDM(topology(0), realBatchSize, arrData)
+    val target = new BDM(topology.last, realBatchSize, arrData, topology(0) * realBatchSize)
     val (weightMatrices, bias) = unrollWeights(weights)
     /* forward run */
     val outputs = forwardRun(input, weightMatrices, bias)
     /* error back propagation */
-    val (gradientMatrices, deltas) = wGradient(weightMatrices, targetVector, outputs)
+    val (gradientMatrices, deltas) = wGradient(weightMatrices, target, outputs)
     rollWeights(gradientMatrices, deltas, cumGradient)
     /* error */
-    val diff = targetVector :- outputs(topology.size - 1)
+    val diff = target :- outputs(topology.size - 1)
+    /* TODO: Check if it is OK in matrix mode */
     val outerError = Bsum(diff :* diff) / 2
     outerError
   }
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
index 252fab7f94fd7..6847a76efe9b4 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
@@ -39,7 +39,7 @@ class ANNSuite extends FunSuite with MLlibTestSparkContext {
     val rddData = sc.parallelize(data, 2)
     val hiddenLayersTopology = Array[Int](5)
     val initialWeights = ArtificialNeuralNetwork.randomWeights(rddData, hiddenLayersTopology, 0x01234567)
-    val model = ArtificialNeuralNetwork.train(rddData, hiddenLayersTopology, initialWeights, 200)
+    val model = ArtificialNeuralNetwork.train(rddData, 4, hiddenLayersTopology, initialWeights, 200)
     val predictionAndLabels = rddData.map { case(input, label) =>
       (model.predict(input)(0), label(0)) }.collect()
     assert(predictionAndLabels.forall { case(p, l) => (math.round(p) - l) == 0 })

From 5205fda2b2c073bd7a72c12d0b8716a917f1d26e Mon Sep 17 00:00:00 2001
From: Alexander Ulanov <nashb@yandex.ru>
Date: Mon, 12 Jan 2015 11:51:03 -0800
Subject: [PATCH 140/143] ANN Classifier batch

---
 .../spark/mllib/ann/ArtificialNeuralNetwork.scala   |  5 +++--
 .../spark/mllib/classification/ANNClassifier.scala  | 13 +++++++++++--
 .../scala/org/apache/spark/mllib/ann/ANNSuite.scala |  6 ++++--
 3 files changed, 18 insertions(+), 6 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index b3b6a8b075137..9931efed1f04b 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -209,9 +209,10 @@ object ArtificialNeuralNetwork {
 
 
   def train(trainingRDD: RDD[(Vector, Vector)], batchSize: Int, hiddenLayersTopology: Array[Int],
-            initialWeights: Vector, maxNumIterations: Int) : ArtificialNeuralNetworkModel = {
+            initialWeights: Vector, maxNumIterations: Int,
+            convergenceTol: Double) : ArtificialNeuralNetworkModel = {
     val topology = convertTopology(trainingRDD, hiddenLayersTopology)
-    new ArtificialNeuralNetwork(topology, maxNumIterations, defaultTolerance, batchSize).
+    new ArtificialNeuralNetwork(topology, maxNumIterations, convergenceTol, batchSize).
       run(trainingRDD, initialWeights)
   }
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/ANNClassifier.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/ANNClassifier.scala
index 3a0537bf91888..3c4e1414fa0d9 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/ANNClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/ANNClassifier.scala
@@ -75,10 +75,10 @@ class ANNClassifier private(val labelToIndex: Map[Double, Int],
                              private val convergeTol: Double)
   extends ANNClassifierHelper with Serializable {
 
-  def run(data: RDD[LabeledPoint]): ANNClassifierModel = {
+  def run(data: RDD[LabeledPoint], batchSize: Int = 1): ANNClassifierModel = {
     val annData = data.map(lp => labeledPointToVectorPair(lp))
     /* train the model */
-    val model = ArtificialNeuralNetwork.train(annData, hiddenLayersTopology,
+    val model = ArtificialNeuralNetwork.train(annData, batchSize, hiddenLayersTopology,
       initialWeights, maxIterations, convergeTol)
     new ANNClassifierModel(model, labelToIndex)
   }
@@ -89,6 +89,15 @@ class ANNClassifier private(val labelToIndex: Map[Double, Int],
  */
 object ANNClassifier {
 
+
+  def train(data: RDD[LabeledPoint], batchSize: Int, hiddenLayersTopology: Array[Int],
+            maxIterations: Int, convergenceTol: Double): ANNClassifierModel = {
+    val initialWeights = randomWeights(data, hiddenLayersTopology)
+    val labelToIndex = data.map( lp => lp.label).distinct().collect().sorted.zipWithIndex.toMap
+    new ANNClassifier(labelToIndex, hiddenLayersTopology,
+      initialWeights, maxIterations, 1.0, convergenceTol).run(data, batchSize)
+  }
+
   /**
    * Trains an ANN classifier.
    *
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
index 6847a76efe9b4..2bccdc09f841a 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/ann/ANNSuite.scala
@@ -38,8 +38,10 @@ class ANNSuite extends FunSuite with MLlibTestSparkContext {
       (Vectors.dense(features), Vectors.dense(Array(label)))}
     val rddData = sc.parallelize(data, 2)
     val hiddenLayersTopology = Array[Int](5)
-    val initialWeights = ArtificialNeuralNetwork.randomWeights(rddData, hiddenLayersTopology, 0x01234567)
-    val model = ArtificialNeuralNetwork.train(rddData, 4, hiddenLayersTopology, initialWeights, 200)
+    val initialWeights = ArtificialNeuralNetwork.
+      randomWeights(rddData, hiddenLayersTopology, 0x01234567)
+    val model = ArtificialNeuralNetwork.
+      train(rddData, 4, hiddenLayersTopology, initialWeights, 200, 1e-4)
     val predictionAndLabels = rddData.map { case(input, label) =>
       (model.predict(input)(0), label(0)) }.collect()
     assert(predictionAndLabels.forall { case(p, l) => (math.round(p) - l) == 0 })

From e660ee8e6ba61c2186daeceb7f0e470c2a13b4a6 Mon Sep 17 00:00:00 2001
From: Alexander Ulanov <nashb@yandex.ru>
Date: Tue, 13 Jan 2015 16:07:29 -0800
Subject: [PATCH 141/143] Divisor fix, train interfaces

---
 .../mllib/ann/ArtificialNeuralNetwork.scala   | 111 +++++++++---------
 .../mllib/classification/ANNClassifier.scala  |  78 +++++++++---
 .../classification/ANNClassifierSuite.scala   |   2 +-
 3 files changed, 119 insertions(+), 72 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index 9931efed1f04b..a6980a9258f22 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -173,6 +173,7 @@ class ArtificialNeuralNetwork private[mllib](
    */
   private def run(trainingRDD: RDD[(Vector, Vector)], initialWeights: Vector):
   ArtificialNeuralNetworkModel = {
+    val t = System.currentTimeMillis()
     val data = if (batchSize == 1) {
       trainingRDD.map(v =>
         (0.0,
@@ -208,15 +209,20 @@ object ArtificialNeuralNetwork {
   private val defaultTolerance: Double = 1e-4
 
 
-  def train(trainingRDD: RDD[(Vector, Vector)], batchSize: Int, hiddenLayersTopology: Array[Int],
-            initialWeights: Vector, maxNumIterations: Int,
+  def train(trainingRDD: RDD[(Vector, Vector)],
+            batchSize: Int,
+            hiddenLayersTopology: Array[Int],
+            initialWeights: Vector,
+            maxNumIterations: Int,
             convergenceTol: Double) : ArtificialNeuralNetworkModel = {
     val topology = convertTopology(trainingRDD, hiddenLayersTopology)
     new ArtificialNeuralNetwork(topology, maxNumIterations, convergenceTol, batchSize).
       run(trainingRDD, initialWeights)
   }
 
-  def train(trainingRDD: RDD[(Vector, Vector)], batchSize: Int, hiddenLayersTopology: Array[Int],
+  def train(trainingRDD: RDD[(Vector, Vector)],
+            batchSize: Int,
+            hiddenLayersTopology: Array[Int],
             maxNumIterations: Int) : ArtificialNeuralNetworkModel = {
     val topology = convertTopology(trainingRDD, hiddenLayersTopology)
     new ArtificialNeuralNetwork(topology, maxNumIterations, defaultTolerance, batchSize).
@@ -232,10 +238,9 @@ object ArtificialNeuralNetwork {
    * @param maxNumIterations specifies maximum number of training iterations.
    * @return ANN model.
    */
-  def train(
-             trainingRDD: RDD[(Vector, Vector)],
-             hiddenLayersTopology: Array[Int],
-             maxNumIterations: Int): ArtificialNeuralNetworkModel = {
+  def train(trainingRDD: RDD[(Vector, Vector)],
+            hiddenLayersTopology: Array[Int],
+            maxNumIterations: Int): ArtificialNeuralNetworkModel = {
     train(trainingRDD, hiddenLayersTopology, maxNumIterations, defaultTolerance)
   }
 
@@ -248,10 +253,9 @@ object ArtificialNeuralNetwork {
    * @param maxNumIterations maximum number of training iterations.
    * @return ANN model.
    */
-  def train(
-             trainingRDD: RDD[(Vector,Vector)],
-             model: ArtificialNeuralNetworkModel,
-             maxNumIterations: Int): ArtificialNeuralNetworkModel = {
+  def train(trainingRDD: RDD[(Vector,Vector)],
+            model: ArtificialNeuralNetworkModel,
+            maxNumIterations: Int): ArtificialNeuralNetworkModel = {
     train(trainingRDD, model, maxNumIterations, defaultTolerance)
   }
 
@@ -264,11 +268,10 @@ object ArtificialNeuralNetwork {
    * @param maxNumIterations maximum number of training iterations.
    * @return ANN model.
    */
-  def train(
-             trainingRDD: RDD[(Vector,Vector)],
-             hiddenLayersTopology: Array[Int],
-             initialWeights: Vector,
-             maxNumIterations: Int): ArtificialNeuralNetworkModel = {
+  def train(trainingRDD: RDD[(Vector,Vector)],
+            hiddenLayersTopology: Array[Int],
+            initialWeights: Vector,
+            maxNumIterations: Int): ArtificialNeuralNetworkModel = {
     train(trainingRDD, hiddenLayersTopology, initialWeights, maxNumIterations, defaultTolerance)
   }
 
@@ -281,11 +284,10 @@ object ArtificialNeuralNetwork {
    * @param convergenceTol convergence tolerance for LBFGS. Smaller value for closer convergence.
    * @return ANN model.
    */
-  def train(
-             trainingRDD: RDD[(Vector,Vector)],
-             model: ArtificialNeuralNetworkModel,
-             maxNumIterations: Int,
-             convergenceTol: Double): ArtificialNeuralNetworkModel = {
+  def train(trainingRDD: RDD[(Vector,Vector)],
+            model: ArtificialNeuralNetworkModel,
+            maxNumIterations: Int,
+            convergenceTol: Double): ArtificialNeuralNetworkModel = {
     new ArtificialNeuralNetwork(model.topology, maxNumIterations, convergenceTol).
       run(trainingRDD, model.weights)
   }
@@ -299,11 +301,10 @@ object ArtificialNeuralNetwork {
    * @param convergenceTol convergence tolerance for LBFGS. Smaller value for closer convergence.
    * @return ANN model.
    */
-  def train(
-             trainingRDD: RDD[(Vector, Vector)],
-             hiddenLayersTopology: Array[Int],
-             maxNumIterations: Int,
-             convergenceTol: Double): ArtificialNeuralNetworkModel = {
+  def train(trainingRDD: RDD[(Vector, Vector)],
+            hiddenLayersTopology: Array[Int],
+            maxNumIterations: Int,
+            convergenceTol: Double): ArtificialNeuralNetworkModel = {
     val topology = convertTopology(trainingRDD, hiddenLayersTopology)
     new ArtificialNeuralNetwork(topology, maxNumIterations, convergenceTol).
       run(trainingRDD, randomWeights(topology, false))
@@ -318,12 +319,11 @@ object ArtificialNeuralNetwork {
    * @param convergenceTol convergence tolerance for LBFGS. Smaller value for closer convergence.
    * @return ANN model.
    */
-  def train(
-             trainingRDD: RDD[(Vector,Vector)],
-             hiddenLayersTopology: Array[Int],
-             initialWeights: Vector,
-             maxNumIterations: Int,
-             convergenceTol: Double): ArtificialNeuralNetworkModel = {
+  def train(trainingRDD: RDD[(Vector,Vector)],
+            hiddenLayersTopology: Array[Int],
+            initialWeights: Vector,
+            maxNumIterations: Int,
+            convergenceTol: Double): ArtificialNeuralNetworkModel = {
     val topology = convertTopology(trainingRDD, hiddenLayersTopology)
     new ArtificialNeuralNetwork(topology, maxNumIterations, convergenceTol).
       run(trainingRDD, initialWeights)
@@ -336,9 +336,8 @@ object ArtificialNeuralNetwork {
    * @param hiddenLayersTopology number of nodes per hidden layer, excluding the bias nodes.
    * @return random weights vector.
    */
-  def randomWeights(
-                     trainingRDD: RDD[(Vector,Vector)],
-                     hiddenLayersTopology: Array[Int]): Vector = {
+  def randomWeights(trainingRDD: RDD[(Vector,Vector)],
+                    hiddenLayersTopology: Array[Int]): Vector = {
     val topology = convertTopology(trainingRDD, hiddenLayersTopology)
     return randomWeights(topology, false)
   }
@@ -351,10 +350,9 @@ object ArtificialNeuralNetwork {
    * @param seed random generator seed.
    * @return random weights vector.
    */
-  def randomWeights(
-                     trainingRDD: RDD[(Vector,Vector)],
-                     hiddenLayersTopology: Array[Int],
-                     seed: Int): Vector = {
+  def randomWeights(trainingRDD: RDD[(Vector,Vector)],
+                    hiddenLayersTopology: Array[Int],
+                    seed: Int): Vector = {
     val topology = convertTopology(trainingRDD, hiddenLayersTopology)
     return randomWeights(topology, true, seed)
   }
@@ -368,19 +366,16 @@ object ArtificialNeuralNetwork {
    * @param seed random generator seed.
    * @return random weights vector.
    */
-  def randomWeights(
-                     inputLayerSize: Int,
-                     outputLayerSize: Int,
-                     hiddenLayersTopology: Array[Int],
-                     seed: Int): Vector = {
-
+  def randomWeights(inputLayerSize: Int,
+                    outputLayerSize: Int,
+                    hiddenLayersTopology: Array[Int],
+                    seed: Int): Vector = {
     val topology = inputLayerSize +: hiddenLayersTopology :+ outputLayerSize
     return randomWeights(topology, true, seed)
   }
 
-  private def convertTopology(
-                               input: RDD[(Vector,Vector)],
-                               hiddenLayersTopology: Array[Int] ): Array[Int] = {
+  private def convertTopology(input: RDD[(Vector,Vector)],
+                              hiddenLayersTopology: Array[Int] ): Array[Int] = {
     val firstElt = input.first
     firstElt._1.size +: hiddenLayersTopology :+ firstElt._2.size
   }
@@ -499,7 +494,9 @@ private[ann] trait NeuralHelper {
     for(i <- (topology.size - 1) until (0, -1)) {
       /* TODO: GEMM? */
       gradientMatrices(i) = deltas(i) * outputs(i - 1).t
-      gradientMatrices(i) :*= outputs(i).cols.toDouble
+      /* NB! dividing by the number of instances in
+       * the batch to be transparent for the optimizer */
+      gradientMatrices(i) :/= outputs(i).cols.toDouble
     }
     (gradientMatrices, deltas)
   }
@@ -529,20 +526,20 @@ private class ANNLeastSquaresGradient(val topology: Array[Int],
     rollWeights(gradientMatrices, deltas, cumGradient)
     /* error */
     val diff = target :- outputs(topology.size - 1)
-    /* TODO: Check if it is OK in matrix mode */
     val outerError = Bsum(diff :* diff) / 2
-    outerError
+    /* NB! dividing by the number of instances in
+     * the batch to be transparent for the optimizer */
+    outerError / realBatchSize
   }
 }
 
 private class ANNUpdater extends Updater {
 
-  override def compute(
-                        weightsOld: Vector,
-                        gradient: Vector,
-                        stepSize: Double,
-                        iter: Int,
-                        regParam: Double): (Vector, Double) = {
+  override def compute(weightsOld: Vector,
+                       gradient: Vector,
+                       stepSize: Double,
+                       iter: Int,
+                       regParam: Double): (Vector, Double) = {
     val thisIterStepSize = stepSize
     val brzWeights: BV[Double] = weightsOld.toBreeze.toDenseVector
     brzAxpy(-thisIterStepSize, gradient.toBreeze, brzWeights)
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/classification/ANNClassifier.scala b/mllib/src/main/scala/org/apache/spark/mllib/classification/ANNClassifier.scala
index 3c4e1414fa0d9..5376815094220 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/classification/ANNClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/classification/ANNClassifier.scala
@@ -89,19 +89,58 @@ class ANNClassifier private(val labelToIndex: Map[Double, Int],
  */
 object ANNClassifier {
 
+  private val defaultStepSize = 1.0
+  private val defaultBatchSize = 1
 
-  def train(data: RDD[LabeledPoint], batchSize: Int, hiddenLayersTopology: Array[Int],
-            maxIterations: Int, convergenceTol: Double): ANNClassifierModel = {
+  /**
+   * Trains an ANN classifier.
+   *
+   * @param data RDD containing labeled points for training.
+   * @param batchSize batch size - number of instances to process in batch
+   * @param hiddenLayersTopology number of nodes per hidden layer, excluding the bias nodes.
+   * @param maxIterations specifies maximum number of training iterations.
+   * @param convergenceTol convergence tolerance for LBFGS
+   * @return ANN model.
+   */
+  def train(data: RDD[LabeledPoint],
+            batchSize: Int,
+            hiddenLayersTopology: Array[Int],
+            maxIterations: Int,
+            convergenceTol: Double): ANNClassifierModel = {
     val initialWeights = randomWeights(data, hiddenLayersTopology)
-    val labelToIndex = data.map( lp => lp.label).distinct().collect().sorted.zipWithIndex.toMap
-    new ANNClassifier(labelToIndex, hiddenLayersTopology,
-      initialWeights, maxIterations, 1.0, convergenceTol).run(data, batchSize)
+    train(data, batchSize, hiddenLayersTopology,
+      initialWeights, maxIterations, defaultStepSize, convergenceTol)
+  }
+
+  /**
+   * Trains an already pre-trained ANN classifier.
+   * Assumes that the data has the same labels that the
+   * data that were used for training, or at least the
+   * subset of that labels
+   *
+   * @param data RDD containing labeled points for training.
+   * @param batchSize batch size - number of instances to process in batch
+   * @param model a pre-trained ANN classifier model.
+   * @param maxIterations specifies maximum number of training iterations.
+   * @param convergenceTol convergence tolerance for LBFGS
+   * @return ANN classifier model.
+   */
+  def train(data: RDD[LabeledPoint],
+            batchSize: Int,
+            model: ANNClassifierModel,
+            maxIterations: Int,
+            convergenceTol: Double): ANNClassifierModel = {
+    val hiddenLayersTopology =
+      model.annModel.topology.slice(1, model.annModel.topology.length - 1)
+    new ANNClassifier(model.labelToIndex, hiddenLayersTopology,
+      model.annModel.weights, maxIterations, defaultStepSize, convergenceTol).run(data, batchSize)
   }
 
   /**
    * Trains an ANN classifier.
    *
    * @param data RDD containing labeled points for training.
+   * @param batchSize batch size - number of instances to process in batch
    * @param hiddenLayersTopology number of nodes per hidden layer, excluding the bias nodes.
    * @param initialWeights initial weights of underlying artificial neural network
    * @param maxIterations specifies maximum number of training iterations.
@@ -109,12 +148,16 @@ object ANNClassifier {
    * @param convergenceTol convergence tolerance for LBFGS
    * @return ANN model.
    */
-  def train(data: RDD[LabeledPoint], hiddenLayersTopology: Array[Int],
-            initialWeights: Vector, maxIterations: Int,
-            stepSize: Double, convergenceTol: Double): ANNClassifierModel = {
+  def train(data: RDD[LabeledPoint],
+            batchSize: Int,
+            hiddenLayersTopology: Array[Int],
+            initialWeights: Vector,
+            maxIterations: Int,
+            stepSize: Double,
+            convergenceTol: Double): ANNClassifierModel = {
     val labelToIndex = data.map( lp => lp.label).distinct().collect().sorted.zipWithIndex.toMap
     new ANNClassifier(labelToIndex, hiddenLayersTopology,
-      initialWeights, maxIterations, stepSize, convergenceTol).run(data)
+      initialWeights, maxIterations, stepSize, convergenceTol).run(data, batchSize)
   }
 
   /**
@@ -127,10 +170,14 @@ object ANNClassifier {
    * @param convergenceTol convergence tolerance for LBFGS
    * @return ANN classifier model.
    */
-  def train(data: RDD[LabeledPoint], hiddenLayersTopology: Array[Int], maxIterations: Int,
-            stepSize: Double, convergenceTol: Double): ANNClassifierModel = {
+  def train(data: RDD[LabeledPoint],
+            hiddenLayersTopology: Array[Int],
+            maxIterations: Int,
+            stepSize: Double,
+            convergenceTol: Double): ANNClassifierModel = {
     val initialWeights = randomWeights(data, hiddenLayersTopology)
-    train(data, hiddenLayersTopology, initialWeights, maxIterations, stepSize, convergenceTol)
+    train(data, defaultBatchSize, hiddenLayersTopology, initialWeights, maxIterations, stepSize,
+      convergenceTol)
   }
 
   /**
@@ -146,8 +193,11 @@ object ANNClassifier {
    * @param convergenceTol convergence tolerance for LBFGS
    * @return ANN classifier model.
    */
-  def train(data: RDD[LabeledPoint], model: ANNClassifierModel, maxIterations: Int,
-            stepSize: Double, convergenceTol: Double): ANNClassifierModel = {
+  def train(data: RDD[LabeledPoint],
+            model: ANNClassifierModel,
+            maxIterations: Int,
+            stepSize: Double,
+            convergenceTol: Double): ANNClassifierModel = {
     val hiddenLayersTopology =
       model.annModel.topology.slice(1, model.annModel.topology.length - 1)
     new ANNClassifier(model.labelToIndex, hiddenLayersTopology,
diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/ANNClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/ANNClassifierSuite.scala
index 3eed26fd72531..35b81bd0b7b7e 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/ANNClassifierSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/ANNClassifierSuite.scala
@@ -37,7 +37,7 @@ class ANNClassifierSuite extends FunSuite with LocalSparkContext {
     val rddData = sc.parallelize(data, 2)
     val hiddenLayerTopology = Array[Int]{5}
     val initialWeights = ANNClassifier.randomWeights(rddData, hiddenLayerTopology, 0x01234567)
-    val model = ANNClassifier.train(rddData, hiddenLayerTopology, initialWeights, 200, 1.0, 1e-4)
+    val model = ANNClassifier.train(rddData, 1, hiddenLayerTopology, initialWeights, 200, 1.0, 1e-4)
     val predictionAndLabels = rddData.map(lp =>
       (model.predict(lp.features), lp.label)).collect()
     assert(predictionAndLabels.forall { case(p, l) =>

From d18e9b5460019970d5bcbb5a0e816aff5a05bf39 Mon Sep 17 00:00:00 2001
From: Alexander Ulanov <nashb@yandex.ru>
Date: Fri, 23 Jan 2015 15:26:55 -0800
Subject: [PATCH 142/143] Test Context fix

---
 .../spark/mllib/classification/ANNClassifierSuite.scala       | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mllib/src/test/scala/org/apache/spark/mllib/classification/ANNClassifierSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/classification/ANNClassifierSuite.scala
index 35b81bd0b7b7e..1aa8221464511 100644
--- a/mllib/src/test/scala/org/apache/spark/mllib/classification/ANNClassifierSuite.scala
+++ b/mllib/src/test/scala/org/apache/spark/mllib/classification/ANNClassifierSuite.scala
@@ -19,10 +19,10 @@ package org.apache.spark.mllib.classification
 
 import org.apache.spark.mllib.linalg.Vectors
 import org.apache.spark.mllib.regression.LabeledPoint
-import org.apache.spark.mllib.util.LocalSparkContext
+import org.apache.spark.mllib.util.MLlibTestSparkContext
 import org.scalatest.FunSuite
 
-class ANNClassifierSuite extends FunSuite with LocalSparkContext {
+class ANNClassifierSuite extends FunSuite with MLlibTestSparkContext {
 
   test("ANN classifier test for XOR"){
     val inputs = Array[Array[Double]](

From 5de5badf32081f8bbd73166b705445b0cc37ebdd Mon Sep 17 00:00:00 2001
From: Alexander Ulanov <nashb@yandex.ru>
Date: Mon, 2 Feb 2015 15:16:01 -0800
Subject: [PATCH 143/143] Bias averaging fix

---
 .../mllib/ann/ArtificialNeuralNetwork.scala   | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
index a6980a9258f22..231597d8c1997 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/ann/ArtificialNeuralNetwork.scala
@@ -422,24 +422,24 @@ private[ann] trait NeuralHelper {
     (for(i <- 1 until topology.size) yield (topology(i) * topology(i - 1))).sum +
       topology.sum - topology(0)
 
-  protected def unrollWeights(weights: linalg.Vector): (Array[BDM[Double]], Array[BDM[Double]]) = {
+  protected def unrollWeights(weights: linalg.Vector): (Array[BDM[Double]], Array[BDV[Double]]) = {
     require(weights.size == weightCount)
     val weightsCopy = weights.toArray
     val weightMatrices = new Array[BDM[Double]](topology.size)
-    val bias = new Array[BDM[Double]](topology.size)
+    val bias = new Array[BDV[Double]](topology.size)
     var offset = 0
     for(i <- 1 until topology.size){
       weightMatrices(i) = new BDM[Double](topology(i), topology(i - 1), weightsCopy, offset)
       offset += topology(i) * topology(i - 1)
       /* TODO: BDM */
-      bias(i) = (new BDV[Double](weightsCopy, offset, 1, topology(i))).toDenseMatrix.t
+      bias(i) = new BDV[Double](weightsCopy, offset, 1, topology(i))
       offset += topology(i)
     }
     (weightMatrices, bias)
   }
 
   protected def rollWeights(weightMatricesUpdate: Array[BDM[Double]],
-                            biasUpdate: Array[BDM[Double]],
+                            biasUpdate: Array[BDV[Double]],
                             cumGradient: Vector): Unit = {
     val wu = cumGradient.toArray
     var offset = 0
@@ -461,12 +461,12 @@ private[ann] trait NeuralHelper {
   }
 
   protected def forwardRun(data: BDM[Double], weightMatrices: Array[BDM[Double]],
-                           bias: Array[BDM[Double]]): Array[BDM[Double]] = {
+                           bias: Array[BDV[Double]]): Array[BDM[Double]] = {
     val outArray = new Array[BDM[Double]](topology.size)
     outArray(0) = data
     for(i <- 1 until topology.size) {
       outArray(i) = weightMatrices(i) * outArray(i - 1)// :+ bias(i))
-      outArray(i)(::, *) :+= bias(i).toDenseVector
+      outArray(i)(::, *) :+= bias(i)
       Bsigmoid.inPlace(outArray(i))
     }
     outArray
@@ -475,9 +475,10 @@ private[ann] trait NeuralHelper {
   protected def wGradient(weightMatrices: Array[BDM[Double]],
                           targetOutput: BDM[Double],
                           outputs: Array[BDM[Double]]):
-  (Array[BDM[Double]], Array[BDM[Double]]) = {
+  (Array[BDM[Double]], Array[BDV[Double]]) = {
     /* error back propagation */
     val deltas = new Array[BDM[Double]](topology.size)
+    val avgDeltas = new Array[BDV[Double]](topology.size)
     for(i <- (topology.size - 1) until (0, -1)){
       /* TODO: GEMM? */
       val outPrime = BDM.ones[Double](outputs(i).rows, outputs(i).cols)
@@ -488,6 +489,8 @@ private[ann] trait NeuralHelper {
       }else{
         deltas(i) = (weightMatrices(i + 1).t * deltas(i + 1)) :* outPrime
       }
+      avgDeltas(i) = Bsum(deltas(i)(*, ::))
+      avgDeltas(i) :/= outputs(i).cols.toDouble
     }
     /* gradient */
     val gradientMatrices = new Array[BDM[Double]](topology.size)
@@ -498,7 +501,7 @@ private[ann] trait NeuralHelper {
        * the batch to be transparent for the optimizer */
       gradientMatrices(i) :/= outputs(i).cols.toDouble
     }
-    (gradientMatrices, deltas)
+    (gradientMatrices, avgDeltas)
   }
 }