Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ import org.apache.spark.sql.Dataset
abstract class Evaluator extends Params {

/**
* Evaluates model output and returns a scalar metric (larger is better).
* Evaluates model output and returns a scalar metric.
* The value of [[isLargerBetter]] specifies whether larger values are better.
*
* @param dataset a dataset that contains labels/observations and predictions.
* @param paramMap parameter map that specifies the input columns and output metrics
Expand All @@ -42,7 +43,9 @@ abstract class Evaluator extends Params {
}

/**
* Evaluates the output.
* Evaluates model output and returns a scalar metric.
* The value of [[isLargerBetter]] specifies whether larger values are better.
*
* @param dataset a dataset that contains labels/observations and predictions.
* @return metric
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@ private[feature] trait MinMaxScalerParams extends Params with HasInputCol with H
* statistics, which is also known as min-max normalization or Rescaling. The rescaled value for
* feature E is calculated as,
*
* Rescaled(e_i) = \frac{e_i - E_{min}}{E_{max} - E_{min}} * (max - min) + min
* `Rescaled(e_i) = \frac{e_i - E_{min}}{E_{max} - E_{min}} * (max - min) + min`
*
* For the case E_{max} == E_{min}, Rescaled(e_i) = 0.5 * (max + min)
* For the case `E_{max} == E_{min}`, `Rescaled(e_i) = 0.5 * (max + min)`.
* Note that since zero values will probably be transformed to non-zero values, output of the
* transformer will be DenseVector even for sparse input.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,16 @@

package org.apache.spark.ml.linalg

import org.apache.spark.annotation.DeveloperApi
import org.apache.spark.annotation.{DeveloperApi, Since}
import org.apache.spark.sql.types.DataType

/**
* :: DeveloperApi ::
* SQL data types for vectors and matrices.
*/
@Since("2.0.0")
@DeveloperApi
object sqlDataTypes {
object SQLDataTypes {

/** Data type for [[Vector]]. */
val VectorType: DataType = new VectorUDT
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ object GeneralizedLinearRegression extends DefaultParamsReadable[GeneralizedLine
def deviance(y: Double, mu: Double, weight: Double): Double

/**
* Akaike's 'An Information Criterion'(AIC) value of the family for a given dataset.
* Akaike Information Criterion (AIC) value of the family for a given dataset.
*
* @param predictions an RDD of (y, mu, weight) of instances in evaluation dataset
* @param deviance the deviance for the fitted model in evaluation dataset
Expand Down Expand Up @@ -702,13 +702,13 @@ class GeneralizedLinearRegressionModel private[ml] (

import GeneralizedLinearRegression._

lazy val familyObj = Family.fromName($(family))
lazy val linkObj = if (isDefined(link)) {
private lazy val familyObj = Family.fromName($(family))
private lazy val linkObj = if (isDefined(link)) {
Link.fromName($(link))
} else {
familyObj.defaultLink
}
lazy val familyAndLink = new FamilyAndLink(familyObj, linkObj)
private lazy val familyAndLink = new FamilyAndLink(familyObj, linkObj)

override protected def predict(features: Vector): Double = {
val eta = predictLink(features)
Expand Down Expand Up @@ -1021,7 +1021,7 @@ class GeneralizedLinearRegressionSummary private[regression] (
rss / degreesOfFreedom
}

/** Akaike's "An Information Criterion"(AIC) for the fitted model. */
/** Akaike Information Criterion (AIC) for the fitted model. */
@Since("2.0.0")
lazy val aic: Double = {
val w = weightCol
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import org.junit.Assert;
import org.junit.Test;

import static org.apache.spark.ml.linalg.sqlDataTypes.*;
import static org.apache.spark.ml.linalg.SQLDataTypes.*;

public class JavaSQLDataTypesSuite {
@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import org.apache.spark.SparkFunSuite

class SQLDataTypesSuite extends SparkFunSuite {
test("sqlDataTypes") {
assert(sqlDataTypes.VectorType === new VectorUDT)
assert(sqlDataTypes.MatrixType === new MatrixUDT)
assert(SQLDataTypes.VectorType === new VectorUDT)
assert(SQLDataTypes.MatrixType === new MatrixUDT)
}
}