/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.spark.ml.regression

import scala.util.Random

import org.apache.spark.SparkFunSuite
import org.apache.spark.ml.feature.Instance
import org.apache.spark.ml.param.ParamsSuite
import org.apache.spark.ml.util.{DefaultReadWriteTest, MLTestingUtils}
import org.apache.spark.mllib.linalg.{DenseVector, Vector, Vectors}
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.util.{LinearDataGenerator, MLlibTestSparkContext}
import org.apache.spark.mllib.util.TestingUtils._
import org.apache.spark.sql.{DataFrame, Row}

class LinearRegressionSuite
  extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {

  private val seed: Int = 42
  @transient var datasetWithDenseFeature: DataFrame = _
  @transient var datasetWithDenseFeatureWithoutIntercept: DataFrame = _
  @transient var datasetWithSparseFeature: DataFrame = _
  @transient var datasetWithWeight: DataFrame = _
  @transient var datasetWithWeightConstantLabel: DataFrame = _
  @transient var datasetWithWeightZeroLabel: DataFrame = _

  override def beforeAll(): Unit = {
    super.beforeAll()
    datasetWithDenseFeature = sqlContext.createDataFrame(
      sc.parallelize(LinearDataGenerator.generateLinearInput(
        intercept = 6.3, weights = Array(4.7, 7.2), xMean = Array(0.9, -1.3),
        xVariance = Array(0.7, 1.2), nPoints = 10000, seed, eps = 0.1), 2))
    /*
       datasetWithDenseFeatureWithoutIntercept is not needed for correctness testing
       but is useful for illustrating training model without intercept
     */
    datasetWithDenseFeatureWithoutIntercept = sqlContext.createDataFrame(
      sc.parallelize(LinearDataGenerator.generateLinearInput(
        intercept = 0.0, weights = Array(4.7, 7.2), xMean = Array(0.9, -1.3),
        xVariance = Array(0.7, 1.2), nPoints = 10000, seed, eps = 0.1), 2))

    val r = new Random(seed)
    // When feature size is larger than 4096, normal optimizer is choosed
    // as the solver of linear regression in the case of "auto" mode.
    val featureSize = 4100
    datasetWithSparseFeature = sqlContext.createDataFrame(
      sc.parallelize(LinearDataGenerator.generateLinearInput(
        intercept = 0.0, weights = Seq.fill(featureSize)(r.nextDouble).toArray,
        xMean = Seq.fill(featureSize)(r.nextDouble).toArray,
        xVariance = Seq.fill(featureSize)(r.nextDouble).toArray, nPoints = 200,
        seed, eps = 0.1, sparsity = 0.7), 2))

    /*
       R code:

       A <- matrix(c(0, 1, 2, 3, 5, 7, 11, 13), 4, 2)
       b <- c(17, 19, 23, 29)
       w <- c(1, 2, 3, 4)
       df <- as.data.frame(cbind(A, b))
     */
    datasetWithWeight = sqlContext.createDataFrame(
      sc.parallelize(Seq(
        Instance(17.0, 1.0, Vectors.dense(0.0, 5.0).toSparse),
        Instance(19.0, 2.0, Vectors.dense(1.0, 7.0)),
        Instance(23.0, 3.0, Vectors.dense(2.0, 11.0)),
        Instance(29.0, 4.0, Vectors.dense(3.0, 13.0))
      ), 2))

    /*
       R code:

       A <- matrix(c(0, 1, 2, 3, 5, 7, 11, 13), 4, 2)
       b.const <- c(17, 17, 17, 17)
       w <- c(1, 2, 3, 4)
       df.const.label <- as.data.frame(cbind(A, b.const))
     */
    datasetWithWeightConstantLabel = sqlContext.createDataFrame(
      sc.parallelize(Seq(
        Instance(17.0, 1.0, Vectors.dense(0.0, 5.0).toSparse),
        Instance(17.0, 2.0, Vectors.dense(1.0, 7.0)),
        Instance(17.0, 3.0, Vectors.dense(2.0, 11.0)),
        Instance(17.0, 4.0, Vectors.dense(3.0, 13.0))
      ), 2))
    datasetWithWeightZeroLabel = sqlContext.createDataFrame(
      sc.parallelize(Seq(
        Instance(0.0, 1.0, Vectors.dense(0.0, 5.0).toSparse),
        Instance(0.0, 2.0, Vectors.dense(1.0, 7.0)),
        Instance(0.0, 3.0, Vectors.dense(2.0, 11.0)),
        Instance(0.0, 4.0, Vectors.dense(3.0, 13.0))
      ), 2))
  }

  /**
   * Enable the ignored test to export the dataset into CSV format,
   * so we can validate the training accuracy compared with R's glmnet package.
   */
  ignore("export test data into CSV format") {
    datasetWithDenseFeature.rdd.map { case Row(label: Double, features: Vector) =>
      label + "," + features.toArray.mkString(",")
    }.repartition(1).saveAsTextFile("target/tmp/LinearRegressionSuite/datasetWithDenseFeature")

    datasetWithDenseFeatureWithoutIntercept.rdd.map {
      case Row(label: Double, features: Vector) =>
        label + "," + features.toArray.mkString(",")
    }.repartition(1).saveAsTextFile(
      "target/tmp/LinearRegressionSuite/datasetWithDenseFeatureWithoutIntercept")

    datasetWithSparseFeature.rdd.map { case Row(label: Double, features: Vector) =>
      label + "," + features.toArray.mkString(",")
    }.repartition(1).saveAsTextFile("target/tmp/LinearRegressionSuite/datasetWithSparseFeature")
  }

  test("params") {
    ParamsSuite.checkParams(new LinearRegression)
    val model = new LinearRegressionModel("linearReg", Vectors.dense(0.0), 0.0)
    ParamsSuite.checkParams(model)
  }

  test("linear regression: default params") {
    val lir = new LinearRegression
    assert(lir.getLabelCol === "label")
    assert(lir.getFeaturesCol === "features")
    assert(lir.getPredictionCol === "prediction")
    assert(lir.getRegParam === 0.0)
    assert(lir.getElasticNetParam === 0.0)
    assert(lir.getFitIntercept)
    assert(lir.getStandardization)
    assert(lir.getSolver == "auto")
    val model = lir.fit(datasetWithDenseFeature)

    // copied model must have the same parent.
    MLTestingUtils.checkCopy(model)

    model.transform(datasetWithDenseFeature)
      .select("label", "prediction")
      .collect()
    assert(model.getFeaturesCol === "features")
    assert(model.getPredictionCol === "prediction")
    assert(model.intercept !== 0.0)
    assert(model.hasParent)
    val numFeatures = datasetWithDenseFeature.select("features").first().getAs[Vector](0).size
    assert(model.numFeatures === numFeatures)
  }

  test("linear regression with intercept without regularization") {
    Seq("auto", "l-bfgs", "normal").foreach { solver =>
      val trainer1 = new LinearRegression().setSolver(solver)
      // The result should be the same regardless of standardization without regularization
      val trainer2 = (new LinearRegression).setStandardization(false).setSolver(solver)
      val model1 = trainer1.fit(datasetWithDenseFeature)
      val model2 = trainer2.fit(datasetWithDenseFeature)

      /*
         Using the following R code to load the data and train the model using glmnet package.

         library("glmnet")
         data <- read.csv("path", header=FALSE, stringsAsFactors=FALSE)
         features <- as.matrix(data.frame(as.numeric(data$V2), as.numeric(data$V3)))
         label <- as.numeric(data$V1)
         coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0, lambda = 0))
         > coefficients
          3 x 1 sparse Matrix of class "dgCMatrix"
                                   s0
         (Intercept)         6.298698
         as.numeric.data.V2. 4.700706
         as.numeric.data.V3. 7.199082
       */
      val interceptR = 6.298698
      val coefficientsR = Vectors.dense(4.700706, 7.199082)

      assert(model1.intercept ~== interceptR relTol 1E-3)
      assert(model1.coefficients ~= coefficientsR relTol 1E-3)
      assert(model2.intercept ~== interceptR relTol 1E-3)
      assert(model2.coefficients ~= coefficientsR relTol 1E-3)

      model1.transform(datasetWithDenseFeature).select("features", "prediction").collect().foreach {
        case Row(features: DenseVector, prediction1: Double) =>
          val prediction2 =
            features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
              model1.intercept
          assert(prediction1 ~== prediction2 relTol 1E-5)
      }
    }
  }

  test("linear regression without intercept without regularization") {
    Seq("auto", "l-bfgs", "normal").foreach { solver =>
      val trainer1 = (new LinearRegression).setFitIntercept(false).setSolver(solver)
      // Without regularization the results should be the same
      val trainer2 = (new LinearRegression).setFitIntercept(false).setStandardization(false)
        .setSolver(solver)
      val model1 = trainer1.fit(datasetWithDenseFeature)
      val modelWithoutIntercept1 = trainer1.fit(datasetWithDenseFeatureWithoutIntercept)
      val model2 = trainer2.fit(datasetWithDenseFeature)
      val modelWithoutIntercept2 = trainer2.fit(datasetWithDenseFeatureWithoutIntercept)

      /*
         coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0, lambda = 0,
           intercept = FALSE))
         > coefficients
          3 x 1 sparse Matrix of class "dgCMatrix"
                                   s0
         (Intercept)         .
         as.numeric.data.V2. 6.973403
         as.numeric.data.V3. 5.284370
       */
      val coefficientsR = Vectors.dense(6.973403, 5.284370)

      assert(model1.intercept ~== 0 absTol 1E-2)
      assert(model1.coefficients ~= coefficientsR relTol 1E-2)
      assert(model2.intercept ~== 0 absTol 1E-2)
      assert(model2.coefficients ~= coefficientsR relTol 1E-2)

      /*
         Then again with the data with no intercept:
         > coefficientsWithoutIntercept
          3 x 1 sparse Matrix of class "dgCMatrix"
                                   s0
         (Intercept)           .
         as.numeric.data3.V2. 4.70011
         as.numeric.data3.V3. 7.19943
       */
      val coefficientsWithourInterceptR = Vectors.dense(4.70011, 7.19943)

      assert(modelWithoutIntercept1.intercept ~== 0 absTol 1E-3)
      assert(modelWithoutIntercept1.coefficients ~= coefficientsWithourInterceptR relTol 1E-3)
      assert(modelWithoutIntercept2.intercept ~== 0 absTol 1E-3)
      assert(modelWithoutIntercept2.coefficients ~= coefficientsWithourInterceptR relTol 1E-3)
    }
  }

  test("linear regression with intercept with L1 regularization") {
    Seq("auto", "l-bfgs", "normal").foreach { solver =>
      val trainer1 = (new LinearRegression).setElasticNetParam(1.0).setRegParam(0.57)
        .setSolver(solver)
      val trainer2 = (new LinearRegression).setElasticNetParam(1.0).setRegParam(0.57)
        .setSolver(solver).setStandardization(false)

      // Normal optimizer is not supported with only L1 regularization case.
      if (solver == "normal") {
        intercept[IllegalArgumentException] {
            trainer1.fit(datasetWithDenseFeature)
            trainer2.fit(datasetWithDenseFeature)
          }
      } else {
        val model1 = trainer1.fit(datasetWithDenseFeature)
        val model2 = trainer2.fit(datasetWithDenseFeature)

        /*
           coefficients <- coef(glmnet(features, label, family="gaussian",
             alpha = 1.0, lambda = 0.57 ))
           > coefficients
            3 x 1 sparse Matrix of class "dgCMatrix"
                                    s0
           (Intercept)       6.242284
           as.numeric.d1.V2. 4.019605
           as.numeric.d1.V3. 6.679538
         */
        val interceptR1 = 6.242284
        val coefficientsR1 = Vectors.dense(4.019605, 6.679538)
        assert(model1.intercept ~== interceptR1 relTol 1E-2)
        assert(model1.coefficients ~= coefficientsR1 relTol 1E-2)

        /*
           coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 1.0,
             lambda = 0.57, standardize=FALSE ))
           > coefficients
            3 x 1 sparse Matrix of class "dgCMatrix"
                                    s0
           (Intercept)         6.416948
           as.numeric.data.V2. 3.893869
           as.numeric.data.V3. 6.724286
         */
        val interceptR2 = 6.416948
        val coefficientsR2 = Vectors.dense(3.893869, 6.724286)

        assert(model2.intercept ~== interceptR2 relTol 1E-3)
        assert(model2.coefficients ~= coefficientsR2 relTol 1E-3)

        model1.transform(datasetWithDenseFeature).select("features", "prediction")
          .collect().foreach {
            case Row(features: DenseVector, prediction1: Double) =>
              val prediction2 =
                features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
                  model1.intercept
              assert(prediction1 ~== prediction2 relTol 1E-5)
        }
      }
    }
  }

  test("linear regression without intercept with L1 regularization") {
    Seq("auto", "l-bfgs", "normal").foreach { solver =>
      val trainer1 = (new LinearRegression).setElasticNetParam(1.0).setRegParam(0.57)
        .setFitIntercept(false).setSolver(solver)
      val trainer2 = (new LinearRegression).setElasticNetParam(1.0).setRegParam(0.57)
        .setFitIntercept(false).setStandardization(false).setSolver(solver)

      // Normal optimizer is not supported with only L1 regularization case.
      if (solver == "normal") {
        intercept[IllegalArgumentException] {
            trainer1.fit(datasetWithDenseFeature)
            trainer2.fit(datasetWithDenseFeature)
          }
      } else {
        val model1 = trainer1.fit(datasetWithDenseFeature)
        val model2 = trainer2.fit(datasetWithDenseFeature)

        /*
           coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 1.0,
             lambda = 0.57, intercept=FALSE ))
           > coefficients
            3 x 1 sparse Matrix of class "dgCMatrix"
                                     s0
           (Intercept)          .
           as.numeric.data.V2. 6.272927
           as.numeric.data.V3. 4.782604
         */
        val interceptR1 = 0.0
        val coefficientsR1 = Vectors.dense(6.272927, 4.782604)

        assert(model1.intercept ~== interceptR1 absTol 1E-2)
        assert(model1.coefficients ~= coefficientsR1 relTol 1E-2)

        /*
           coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 1.0,
             lambda = 0.57, intercept=FALSE, standardize=FALSE ))
           > coefficients
            3 x 1 sparse Matrix of class "dgCMatrix"
                                     s0
           (Intercept)         .
           as.numeric.data.V2. 6.207817
           as.numeric.data.V3. 4.775780
         */
        val interceptR2 = 0.0
        val coefficientsR2 = Vectors.dense(6.207817, 4.775780)

        assert(model2.intercept ~== interceptR2 absTol 1E-2)
        assert(model2.coefficients ~= coefficientsR2 relTol 1E-2)

        model1.transform(datasetWithDenseFeature).select("features", "prediction")
          .collect().foreach {
            case Row(features: DenseVector, prediction1: Double) =>
              val prediction2 =
                features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
                  model1.intercept
              assert(prediction1 ~== prediction2 relTol 1E-5)
        }
      }
    }
  }

  test("linear regression with intercept with L2 regularization") {
    Seq("auto", "l-bfgs", "normal").foreach { solver =>
      val trainer1 = (new LinearRegression).setElasticNetParam(0.0).setRegParam(2.3)
        .setSolver(solver)
      val trainer2 = (new LinearRegression).setElasticNetParam(0.0).setRegParam(2.3)
        .setStandardization(false).setSolver(solver)
      val model1 = trainer1.fit(datasetWithDenseFeature)
      val model2 = trainer2.fit(datasetWithDenseFeature)

      /*
         coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3))
         > coefficients
          3 x 1 sparse Matrix of class "dgCMatrix"
                                   s0
         (Intercept)       5.260103
         as.numeric.d1.V2. 3.725522
         as.numeric.d1.V3. 5.711203
       */
      val interceptR1 = 5.260103
      val coefficientsR1 = Vectors.dense(3.725522, 5.711203)

      assert(model1.intercept ~== interceptR1 relTol 1E-2)
      assert(model1.coefficients ~= coefficientsR1 relTol 1E-2)

      /*
         coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3,
           standardize=FALSE))
         > coefficients
          3 x 1 sparse Matrix of class "dgCMatrix"
                                   s0
         (Intercept)       5.790885
         as.numeric.d1.V2. 3.432373
         as.numeric.d1.V3. 5.919196
       */
      val interceptR2 = 5.790885
      val coefficientsR2 = Vectors.dense(3.432373, 5.919196)

      assert(model2.intercept ~== interceptR2 relTol 1E-2)
      assert(model2.coefficients ~= coefficientsR2 relTol 1E-2)

      model1.transform(datasetWithDenseFeature).select("features", "prediction").collect().foreach {
        case Row(features: DenseVector, prediction1: Double) =>
          val prediction2 =
            features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
              model1.intercept
          assert(prediction1 ~== prediction2 relTol 1E-5)
      }
    }
  }

  test("linear regression without intercept with L2 regularization") {
    Seq("auto", "l-bfgs", "normal").foreach { solver =>
      val trainer1 = (new LinearRegression).setElasticNetParam(0.0).setRegParam(2.3)
        .setFitIntercept(false).setSolver(solver)
      val trainer2 = (new LinearRegression).setElasticNetParam(0.0).setRegParam(2.3)
        .setFitIntercept(false).setStandardization(false).setSolver(solver)
      val model1 = trainer1.fit(datasetWithDenseFeature)
      val model2 = trainer2.fit(datasetWithDenseFeature)

      /*
         coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3,
           intercept = FALSE))
         > coefficients
          3 x 1 sparse Matrix of class "dgCMatrix"
                                   s0
         (Intercept)       .
         as.numeric.d1.V2. 5.493430
         as.numeric.d1.V3. 4.223082
       */
      val interceptR1 = 0.0
      val coefficientsR1 = Vectors.dense(5.493430, 4.223082)

      assert(model1.intercept ~== interceptR1 absTol 1E-2)
      assert(model1.coefficients ~= coefficientsR1 relTol 1E-2)

      /*
         coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.0, lambda = 2.3,
           intercept = FALSE, standardize=FALSE))
         > coefficients
          3 x 1 sparse Matrix of class "dgCMatrix"
                                   s0
         (Intercept)         .
         as.numeric.d1.V2. 5.244324
         as.numeric.d1.V3. 4.203106
       */
      val interceptR2 = 0.0
      val coefficientsR2 = Vectors.dense(5.244324, 4.203106)

      assert(model2.intercept ~== interceptR2 absTol 1E-2)
      assert(model2.coefficients ~= coefficientsR2 relTol 1E-2)

      model1.transform(datasetWithDenseFeature).select("features", "prediction").collect().foreach {
        case Row(features: DenseVector, prediction1: Double) =>
          val prediction2 =
            features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
              model1.intercept
          assert(prediction1 ~== prediction2 relTol 1E-5)
      }
    }
  }

  test("linear regression with intercept with ElasticNet regularization") {
    Seq("auto", "l-bfgs", "normal").foreach { solver =>
      val trainer1 = (new LinearRegression).setElasticNetParam(0.3).setRegParam(1.6)
        .setSolver(solver)
      val trainer2 = (new LinearRegression).setElasticNetParam(0.3).setRegParam(1.6)
        .setStandardization(false).setSolver(solver)

      // Normal optimizer is not supported with non-zero elasticnet parameter.
      if (solver == "normal") {
        intercept[IllegalArgumentException] {
            trainer1.fit(datasetWithDenseFeature)
            trainer2.fit(datasetWithDenseFeature)
          }
      } else {
        val model1 = trainer1.fit(datasetWithDenseFeature)
        val model2 = trainer2.fit(datasetWithDenseFeature)

        /*
           coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.3,
             lambda = 1.6 ))
           > coefficients
            3 x 1 sparse Matrix of class "dgCMatrix"
                                     s0
           (Intercept)       5.689855
           as.numeric.d1.V2. 3.661181
           as.numeric.d1.V3. 6.000274
         */
        val interceptR1 = 5.689855
        val coefficientsR1 = Vectors.dense(3.661181, 6.000274)

        assert(model1.intercept ~== interceptR1 relTol 1E-2)
        assert(model1.coefficients ~= coefficientsR1 relTol 1E-2)

        /*
           coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.3, lambda = 1.6
             standardize=FALSE))
           > coefficients
            3 x 1 sparse Matrix of class "dgCMatrix"
                                     s0
           (Intercept)       6.113890
           as.numeric.d1.V2. 3.407021
           as.numeric.d1.V3. 6.152512
         */
        val interceptR2 = 6.113890
        val coefficientsR2 = Vectors.dense(3.407021, 6.152512)

        assert(model2.intercept ~== interceptR2 relTol 1E-2)
        assert(model2.coefficients ~= coefficientsR2 relTol 1E-2)

        model1.transform(datasetWithDenseFeature).select("features", "prediction")
          .collect().foreach {
          case Row(features: DenseVector, prediction1: Double) =>
            val prediction2 =
              features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
                model1.intercept
            assert(prediction1 ~== prediction2 relTol 1E-5)
        }
      }
    }
  }

  test("linear regression without intercept with ElasticNet regularization") {
    Seq("auto", "l-bfgs", "normal").foreach { solver =>
      val trainer1 = (new LinearRegression).setElasticNetParam(0.3).setRegParam(1.6)
        .setFitIntercept(false).setSolver(solver)
      val trainer2 = (new LinearRegression).setElasticNetParam(0.3).setRegParam(1.6)
        .setFitIntercept(false).setStandardization(false).setSolver(solver)

      // Normal optimizer is not supported with non-zero elasticnet parameter.
      if (solver == "normal") {
        intercept[IllegalArgumentException] {
            trainer1.fit(datasetWithDenseFeature)
            trainer2.fit(datasetWithDenseFeature)
          }
      } else {
        val model1 = trainer1.fit(datasetWithDenseFeature)
        val model2 = trainer2.fit(datasetWithDenseFeature)

        /*
           coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.3,
             lambda = 1.6, intercept=FALSE ))
           > coefficients
            3 x 1 sparse Matrix of class "dgCMatrix"
                                      s0
           (Intercept)       .
           as.numeric.d1.V2. 5.643748
           as.numeric.d1.V3. 4.331519
         */
        val interceptR1 = 0.0
        val coefficientsR1 = Vectors.dense(5.643748, 4.331519)

        assert(model1.intercept ~== interceptR1 absTol 1E-2)
        assert(model1.coefficients ~= coefficientsR1 relTol 1E-2)

        /*
           coefficients <- coef(glmnet(features, label, family="gaussian", alpha = 0.3,
             lambda = 1.6, intercept=FALSE, standardize=FALSE ))
           > coefficients
            3 x 1 sparse Matrix of class "dgCMatrix"
                                     s0
           (Intercept)         .
           as.numeric.d1.V2. 5.455902
           as.numeric.d1.V3. 4.312266

         */
        val interceptR2 = 0.0
        val coefficientsR2 = Vectors.dense(5.455902, 4.312266)

        assert(model2.intercept ~== interceptR2 absTol 1E-2)
        assert(model2.coefficients ~= coefficientsR2 relTol 1E-2)

        model1.transform(datasetWithDenseFeature).select("features", "prediction")
          .collect().foreach {
          case Row(features: DenseVector, prediction1: Double) =>
            val prediction2 =
              features(0) * model1.coefficients(0) + features(1) * model1.coefficients(1) +
                model1.intercept
            assert(prediction1 ~== prediction2 relTol 1E-5)
        }
      }
    }
  }

  test("linear regression model with constant label") {
    /*
       R code:
       for (formula in c(b.const ~ . -1, b.const ~ .)) {
         model <- lm(formula, data=df.const.label, weights=w)
         print(as.vector(coef(model)))
       }
      [1] -9.221298  3.394343
      [1] 17  0  0
    */
    val expected = Seq(
      Vectors.dense(0.0, -9.221298, 3.394343),
      Vectors.dense(17.0, 0.0, 0.0))

    Seq("auto", "l-bfgs", "normal").foreach { solver =>
      var idx = 0
      for (fitIntercept <- Seq(false, true)) {
        val model1 = new LinearRegression()
          .setFitIntercept(fitIntercept)
          .setWeightCol("weight")
          .setSolver(solver)
          .fit(datasetWithWeightConstantLabel)
        val actual1 = Vectors.dense(model1.intercept, model1.coefficients(0),
            model1.coefficients(1))
        assert(actual1 ~== expected(idx) absTol 1e-4)

        val model2 = new LinearRegression()
          .setFitIntercept(fitIntercept)
          .setWeightCol("weight")
          .setSolver(solver)
          .fit(datasetWithWeightZeroLabel)
        val actual2 = Vectors.dense(model2.intercept, model2.coefficients(0),
            model2.coefficients(1))
        assert(actual2 ~==  Vectors.dense(0.0, 0.0, 0.0) absTol 1e-4)
        idx += 1
      }
    }
  }

  test("regularized linear regression through origin with constant label") {
    // The problem is ill-defined if fitIntercept=false, regParam is non-zero.
    // An exception is thrown in this case.
    Seq("auto", "l-bfgs", "normal").foreach { solver =>
      for (standardization <- Seq(false, true)) {
        val model = new LinearRegression().setFitIntercept(false)
          .setRegParam(0.1).setStandardization(standardization).setSolver(solver)
        intercept[IllegalArgumentException] {
          model.fit(datasetWithWeightConstantLabel)
        }
      }
    }
  }

  test("linear regression with l-bfgs when training is not needed") {
    // When label is constant, l-bfgs solver returns results without training.
    // There are two possibilities: If the label is non-zero but constant,
    // and fitIntercept is true, then the model return yMean as intercept without training.
    // If label is all zeros, then all coefficients are zero regardless of fitIntercept, so
    // no training is needed.
    for (fitIntercept <- Seq(false, true)) {
      for (standardization <- Seq(false, true)) {
        val model1 = new LinearRegression()
          .setFitIntercept(fitIntercept)
          .setStandardization(standardization)
          .setWeightCol("weight")
          .setSolver("l-bfgs")
          .fit(datasetWithWeightConstantLabel)
        if (fitIntercept) {
          assert(model1.summary.objectiveHistory(0) ~== 0.0 absTol 1e-4)
        }
        val model2 = new LinearRegression()
          .setFitIntercept(fitIntercept)
          .setWeightCol("weight")
          .setSolver("l-bfgs")
          .fit(datasetWithWeightZeroLabel)
        assert(model2.summary.objectiveHistory(0) ~== 0.0 absTol 1e-4)
      }
    }
  }

  test("linear regression model training summary") {
    Seq("auto", "l-bfgs", "normal").foreach { solver =>
      val trainer = new LinearRegression().setSolver(solver)
      val model = trainer.fit(datasetWithDenseFeature)
      val trainerNoPredictionCol = trainer.setPredictionCol("")
      val modelNoPredictionCol = trainerNoPredictionCol.fit(datasetWithDenseFeature)

      // Training results for the model should be available
      assert(model.hasSummary)
      assert(modelNoPredictionCol.hasSummary)

      // Schema should be a superset of the input dataset
      assert((datasetWithDenseFeature.schema.fieldNames.toSet + "prediction").subsetOf(
        model.summary.predictions.schema.fieldNames.toSet))
      // Validate that we re-insert a prediction column for evaluation
      val modelNoPredictionColFieldNames
      = modelNoPredictionCol.summary.predictions.schema.fieldNames
      assert((datasetWithDenseFeature.schema.fieldNames.toSet).subsetOf(
        modelNoPredictionColFieldNames.toSet))
      assert(modelNoPredictionColFieldNames.exists(s => s.startsWith("prediction_")))

      // Residuals in [[LinearRegressionResults]] should equal those manually computed
      val expectedResiduals = datasetWithDenseFeature.select("features", "label")
        .rdd
        .map { case Row(features: DenseVector, label: Double) =>
          val prediction =
            features(0) * model.coefficients(0) + features(1) * model.coefficients(1) +
              model.intercept
          label - prediction
        }
        .zip(model.summary.residuals.rdd.map(_.getDouble(0)))
        .collect()
        .foreach { case (manualResidual: Double, resultResidual: Double) =>
          assert(manualResidual ~== resultResidual relTol 1E-5)
        }

      /*
         # Use the following R code to generate model training results.

         # path/part-00000 is the file generated by running LinearDataGenerator.generateLinearInput
         # as described before the beforeAll() method.
         d1 <- read.csv("path/part-00000", header=FALSE, stringsAsFactors=FALSE)
         fit <- glm(V1 ~ V2 + V3, data = d1, family = "gaussian")
         names(f1)[1] = c("V2")
         names(f1)[2] = c("V3")
         f1 <- data.frame(as.numeric(d1$V2), as.numeric(d1$V3))
         predictions <- predict(fit, newdata=f1)
         l1 <- as.numeric(d1$V1)

         residuals <- l1 - predictions
         > mean(residuals^2)           # MSE
         [1] 0.00985449
         > mean(abs(residuals))        # MAD
         [1] 0.07961668
         > cor(predictions, l1)^2   # r^2
         [1] 0.9998737

         > summary(fit)

          Call:
          glm(formula = V1 ~ V2 + V3, family = "gaussian", data = d1)

          Deviance Residuals:
               Min        1Q    Median        3Q       Max
          -0.47082  -0.06797   0.00002   0.06725   0.34635

          Coefficients:
                       Estimate Std. Error t value Pr(>|t|)
          (Intercept) 6.3022157  0.0018600    3388   <2e-16 ***
          V2          4.6982442  0.0011805    3980   <2e-16 ***
          V3          7.1994344  0.0009044    7961   <2e-16 ***
          ---

          ....
       */
      assert(model.summary.meanSquaredError ~== 0.00985449 relTol 1E-4)
      assert(model.summary.meanAbsoluteError ~== 0.07961668 relTol 1E-4)
      assert(model.summary.r2 ~== 0.9998737 relTol 1E-4)

      // Normal solver uses "WeightedLeastSquares". This algorithm does not generate
      // objective history because it does not run through iterations.
      if (solver == "l-bfgs") {
        // Objective function should be monotonically decreasing for linear regression
        assert(
          model.summary
            .objectiveHistory
            .sliding(2)
            .forall(x => x(0) >= x(1)))
      } else {
        // To clalify that the normal solver is used here.
        assert(model.summary.objectiveHistory.length == 1)
        assert(model.summary.objectiveHistory(0) == 0.0)
        val devianceResidualsR = Array(-0.47082, 0.34635)
        val seCoefR = Array(0.0011805, 0.0009044, 0.0018600)
        val tValsR = Array(3980, 7961, 3388)
        val pValsR = Array(0, 0, 0)
        model.summary.devianceResiduals.zip(devianceResidualsR).foreach { x =>
          assert(x._1 ~== x._2 absTol 1E-4) }
        model.summary.coefficientStandardErrors.zip(seCoefR).foreach{ x =>
          assert(x._1 ~== x._2 absTol 1E-4) }
        model.summary.tValues.map(_.round).zip(tValsR).foreach{ x => assert(x._1 === x._2) }
        model.summary.pValues.map(_.round).zip(pValsR).foreach{ x => assert(x._1 === x._2) }
      }
    }
  }

  test("linear regression model testset evaluation summary") {
    Seq("auto", "l-bfgs", "normal").foreach { solver =>
      val trainer = new LinearRegression().setSolver(solver)
      val model = trainer.fit(datasetWithDenseFeature)

      // Evaluating on training dataset should yield results summary equal to training summary
      val testSummary = model.evaluate(datasetWithDenseFeature)
      assert(model.summary.meanSquaredError ~== testSummary.meanSquaredError relTol 1E-5)
      assert(model.summary.r2 ~== testSummary.r2 relTol 1E-5)
      model.summary.residuals.select("residuals").collect()
        .zip(testSummary.residuals.select("residuals").collect())
        .forall { case (Row(r1: Double), Row(r2: Double)) => r1 ~== r2 relTol 1E-5 }
    }
  }

  test("linear regression with weighted samples") {
    Seq("auto", "l-bfgs", "normal").foreach { solver =>
      val (data, weightedData) = {
        val activeData = LinearDataGenerator.generateLinearInput(
          6.3, Array(4.7, 7.2), Array(0.9, -1.3), Array(0.7, 1.2), 500, 1, 0.1)

        val rnd = new Random(8392)
        val signedData = activeData.map { case p: LabeledPoint =>
          (rnd.nextGaussian() > 0.0, p)
        }

        val data1 = signedData.flatMap {
          case (true, p) => Iterator(p, p)
          case (false, p) => Iterator(p)
        }

        val weightedSignedData = signedData.flatMap {
          case (true, LabeledPoint(label, features)) =>
            Iterator(
              Instance(label, weight = 1.2, features),
              Instance(label, weight = 0.8, features)
            )
          case (false, LabeledPoint(label, features)) =>
            Iterator(
              Instance(label, weight = 0.3, features),
              Instance(label, weight = 0.1, features),
              Instance(label, weight = 0.6, features)
            )
        }

        val noiseData = LinearDataGenerator.generateLinearInput(
          2, Array(1, 3), Array(0.9, -1.3), Array(0.7, 1.2), 500, 1, 0.1)
        val weightedNoiseData = noiseData.map {
          case LabeledPoint(label, features) => Instance(label, weight = 0, features)
        }
        val data2 = weightedSignedData ++ weightedNoiseData

        (sqlContext.createDataFrame(sc.parallelize(data1, 4)),
          sqlContext.createDataFrame(sc.parallelize(data2, 4)))
      }

      val trainer1a = (new LinearRegression).setFitIntercept(true)
        .setElasticNetParam(0.0).setRegParam(0.21).setStandardization(true).setSolver(solver)
      val trainer1b = (new LinearRegression).setFitIntercept(true).setWeightCol("weight")
        .setElasticNetParam(0.0).setRegParam(0.21).setStandardization(true).setSolver(solver)

      // Normal optimizer is not supported with non-zero elasticnet parameter.
      val model1a0 = trainer1a.fit(data)
      val model1a1 = trainer1a.fit(weightedData)
      val model1b = trainer1b.fit(weightedData)

      assert(model1a0.coefficients !~= model1a1.coefficients absTol 1E-3)
      assert(model1a0.intercept !~= model1a1.intercept absTol 1E-3)
      assert(model1a0.coefficients ~== model1b.coefficients absTol 1E-3)
      assert(model1a0.intercept ~== model1b.intercept absTol 1E-3)

      val trainer2a = (new LinearRegression).setFitIntercept(true)
        .setElasticNetParam(0.0).setRegParam(0.21).setStandardization(false).setSolver(solver)
      val trainer2b = (new LinearRegression).setFitIntercept(true).setWeightCol("weight")
        .setElasticNetParam(0.0).setRegParam(0.21).setStandardization(false).setSolver(solver)
      val model2a0 = trainer2a.fit(data)
      val model2a1 = trainer2a.fit(weightedData)
      val model2b = trainer2b.fit(weightedData)
      assert(model2a0.coefficients !~= model2a1.coefficients absTol 1E-3)
      assert(model2a0.intercept !~= model2a1.intercept absTol 1E-3)
      assert(model2a0.coefficients ~== model2b.coefficients absTol 1E-3)
      assert(model2a0.intercept ~== model2b.intercept absTol 1E-3)

      val trainer3a = (new LinearRegression).setFitIntercept(false)
        .setElasticNetParam(0.0).setRegParam(0.21).setStandardization(true).setSolver(solver)
      val trainer3b = (new LinearRegression).setFitIntercept(false).setWeightCol("weight")
        .setElasticNetParam(0.0).setRegParam(0.21).setStandardization(true).setSolver(solver)
      val model3a0 = trainer3a.fit(data)
      val model3a1 = trainer3a.fit(weightedData)
      val model3b = trainer3b.fit(weightedData)
      assert(model3a0.coefficients !~= model3a1.coefficients absTol 1E-3)
      assert(model3a0.coefficients ~== model3b.coefficients absTol 1E-3)

      val trainer4a = (new LinearRegression).setFitIntercept(false)
        .setElasticNetParam(0.0).setRegParam(0.21).setStandardization(false).setSolver(solver)
      val trainer4b = (new LinearRegression).setFitIntercept(false).setWeightCol("weight")
        .setElasticNetParam(0.0).setRegParam(0.21).setStandardization(false).setSolver(solver)
      val model4a0 = trainer4a.fit(data)
      val model4a1 = trainer4a.fit(weightedData)
      val model4b = trainer4b.fit(weightedData)
      assert(model4a0.coefficients !~= model4a1.coefficients absTol 1E-3)
      assert(model4a0.coefficients ~== model4b.coefficients absTol 1E-3)
    }
  }

  test("linear regression model with l-bfgs with big feature datasets") {
    val trainer = new LinearRegression().setSolver("auto")
    val model = trainer.fit(datasetWithSparseFeature)

    // Training results for the model should be available
    assert(model.hasSummary)
    // When LBFGS is used as optimizer, objective history can be restored.
    assert(
      model.summary
        .objectiveHistory
        .sliding(2)
        .forall(x => x(0) >= x(1)))
  }

  test("linear regression summary with weighted samples and intercept by normal solver") {
    /*
       R code:

       model <- glm(formula = "b ~ .", data = df, weights = w)
       summary(model)

       Call:
       glm(formula = "b ~ .", data = df, weights = w)

       Deviance Residuals:
            1       2       3       4
        1.920  -1.358  -1.109   0.960

       Coefficients:
                   Estimate Std. Error t value Pr(>|t|)
       (Intercept)   18.080      9.608   1.882    0.311
       V1             6.080      5.556   1.094    0.471
       V2            -0.600      1.960  -0.306    0.811

       (Dispersion parameter for gaussian family taken to be 7.68)

           Null deviance: 202.00  on 3  degrees of freedom
       Residual deviance:   7.68  on 1  degrees of freedom
       AIC: 18.783

       Number of Fisher Scoring iterations: 2
     */

    val model = new LinearRegression()
      .setWeightCol("weight")
      .setSolver("normal")
      .fit(datasetWithWeight)
    val coefficientsR = Vectors.dense(Array(6.080, -0.600))
    val interceptR = 18.080
    val devianceResidualsR = Array(-1.358, 1.920)
    val seCoefR = Array(5.556, 1.960, 9.608)
    val tValsR = Array(1.094, -0.306, 1.882)
    val pValsR = Array(0.471, 0.811, 0.311)

    assert(model.coefficients ~== coefficientsR absTol 1E-3)
    assert(model.intercept ~== interceptR absTol 1E-3)
    model.summary.devianceResiduals.zip(devianceResidualsR).foreach { x =>
      assert(x._1 ~== x._2 absTol 1E-3) }
    model.summary.coefficientStandardErrors.zip(seCoefR).foreach{ x =>
      assert(x._1 ~== x._2 absTol 1E-3) }
    model.summary.tValues.zip(tValsR).foreach{ x => assert(x._1 ~== x._2 absTol 1E-3) }
    model.summary.pValues.zip(pValsR).foreach{ x => assert(x._1 ~== x._2 absTol 1E-3) }
  }

  test("linear regression summary with weighted samples and w/o intercept by normal solver") {
    /*
       R code:

       model <- glm(formula = "b ~ . -1", data = df, weights = w)
       summary(model)

       Call:
       glm(formula = "b ~ . -1", data = df, weights = w)

       Deviance Residuals:
            1       2       3       4
        1.950   2.344  -4.600   2.103

       Coefficients:
          Estimate Std. Error t value Pr(>|t|)
       V1  -3.7271     2.9032  -1.284   0.3279
       V2   3.0100     0.6022   4.998   0.0378 *
       ---
       Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

       (Dispersion parameter for gaussian family taken to be 17.4376)

           Null deviance: 5962.000  on 4  degrees of freedom
       Residual deviance:   34.875  on 2  degrees of freedom
       AIC: 22.835

       Number of Fisher Scoring iterations: 2
     */

    val model = new LinearRegression()
      .setWeightCol("weight")
      .setSolver("normal")
      .setFitIntercept(false)
      .fit(datasetWithWeight)
    val coefficientsR = Vectors.dense(Array(-3.7271, 3.0100))
    val interceptR = 0.0
    val devianceResidualsR = Array(-4.600, 2.344)
    val seCoefR = Array(2.9032, 0.6022)
    val tValsR = Array(-1.284, 4.998)
    val pValsR = Array(0.3279, 0.0378)

    assert(model.coefficients ~== coefficientsR absTol 1E-3)
    assert(model.intercept === interceptR)
    model.summary.devianceResiduals.zip(devianceResidualsR).foreach { x =>
      assert(x._1 ~== x._2 absTol 1E-3) }
    model.summary.coefficientStandardErrors.zip(seCoefR).foreach{ x =>
      assert(x._1 ~== x._2 absTol 1E-3) }
    model.summary.tValues.zip(tValsR).foreach{ x => assert(x._1 ~== x._2 absTol 1E-3) }
    model.summary.pValues.zip(pValsR).foreach{ x => assert(x._1 ~== x._2 absTol 1E-3) }
  }

  test("read/write") {
    def checkModelData(model: LinearRegressionModel, model2: LinearRegressionModel): Unit = {
      assert(model.intercept === model2.intercept)
      assert(model.coefficients === model2.coefficients)
    }
    val lr = new LinearRegression()
    testEstimatorAndModelReadWrite(lr, datasetWithWeight, LinearRegressionSuite.allParamSettings,
      checkModelData)
  }
}

object LinearRegressionSuite {

  /**
   * Mapping from all Params to valid settings which differ from the defaults.
   * This is useful for tests which need to exercise all Params, such as save/load.
   * This excludes input columns to simplify some tests.
   */
  val allParamSettings: Map[String, Any] = Map(
    "predictionCol" -> "myPrediction",
    "regParam" -> 0.01,
    "elasticNetParam" -> 0.1,
    "maxIter" -> 2,  // intentionally small
    "fitIntercept" -> true,
    "tol" -> 0.8,
    "standardization" -> false,
    "solver" -> "l-bfgs"
  )
}