Hello,

I have some code trying to compare linear regression coefficients with
three sets of features, as shown below. On the third one, I get an
assertion error.

This is the code,

object MultipleRegression extends App {



  val spark = SparkSession.builder().appName("Regression Model
Builder").master("local").getOrCreate()

  import spark.implicits._

  val training = build("kc_house_train_data.csv", "train", spark)
  val test = build("kc_house_test_data.csv", "test", spark)

  val lr = new LinearRegression()

  val m1 = lr.fit(training.map(r => buildLp(r, "sqft_living",
"bedrooms", "bathrooms", "lat", "long")))
  println(s"Coefficients: ${m1.coefficients}, Intercept: ${m1.intercept}")

  val m2 = lr.fit(training.map(r => buildLp(r, "sqft_living",
"bedrooms", "bathrooms", "lat", "long", "bed_bath_rooms")))
  println(s"Coefficients: ${m2.coefficients}, Intercept: ${m2.intercept}")

  val m3 = lr.fit(training.map(r => buildLp(r, "sqft_living",
"bedrooms", "bathrooms", "lat", "long", "bed_bath_rooms",
"bedrooms_squared", "log_sqft_living", "lat_plus_long")))
  println(s"Coefficients: ${m3.coefficients}, Intercept: ${m3.intercept}")


  def build(path: String, view: String, spark: SparkSession) = {

    val toDouble = udf((x: String) => x.toDouble)
    val product = udf((x: Double, y: Double) => x * y)
    val sum = udf((x: Double, y: Double) => x + y)
    val log = udf((x: Double) => scala.math.log(x))

    spark.read.
      option("header", "true").
      csv(path).
      withColumn("sqft_living", toDouble('sqft_living)).
      withColumn("price", toDouble('price)).
      withColumn("bedrooms", toDouble('bedrooms)).
      withColumn("bathrooms", toDouble('bathrooms)).
      withColumn("lat", toDouble('lat)).
      withColumn("long", toDouble('long)).
      withColumn("bedrooms_squared", product('bedrooms, 'bedrooms)).
      withColumn("bed_bath_rooms", product('bedrooms, 'bathrooms)).
      withColumn("lat_plus_long", sum('lat, 'long)).
      withColumn("log_sqft_living", log('sqft_living))

  }

  def buildLp(r: Row, input: String*) = {
    var features = input.map(r.getAs[Double](_)).toArray
    new LabeledPoint(r.getAs[Double]("price"), Vectors.dense(features))
  }

}


This is the error I get.

Exception in thread "main" java.lang.AssertionError: assertion failed:
lapack.dppsv returned 9.
at scala.Predef$.assert(Predef.scala:170)
at
org.apache.spark.mllib.linalg.CholeskyDecomposition$.solve(CholeskyDecomposition.scala:40)
at
org.apache.spark.ml.optim.WeightedLeastSquares.fit(WeightedLeastSquares.scala:140)
at
org.apache.spark.ml.regression.LinearRegression.train(LinearRegression.scala:180)
at
org.apache.spark.ml.regression.LinearRegression.train(LinearRegression.scala:70)
at org.apache.spark.ml.Predictor.fit(Predictor.scala:90)
at
com.ss.ml.regression.MultipleRegression$.delayedEndpoint$com$ss$ml$regression$MultipleRegression$1(MultipleRegression.scala:36)
at
com.ss.ml.regression.MultipleRegression$delayedInit$body.apply(MultipleRegression.scala:12)
at scala.Function0$class.apply$mcV$sp(Function0.scala:34)
at scala.runtime.AbstractFunction0.apply$mcV$sp(AbstractFunction0.scala:12)
at scala.App$$anonfun$main$1.apply(App.scala:76)
at scala.App$$anonfun$main$1.apply(App.scala:76)
at scala.collection.immutable.List.foreach(List.scala:381)
at
scala.collection.generic.TraversableForwarder$class.foreach(TraversableForwarder.scala:35)
at scala.App$class.main(App.scala:76)
at
com.ss.ml.regression.MultipleRegression$.main(MultipleRegression.scala:12)
at com.ss.ml.regression.MultipleRegression.main(MultipleRegression.scala)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:483)
at com.intellij.rt.execution.application.AppMain.main(AppMain.java:140)


Does anyone know what is going wrong here?

Many thanks

-- 
*Meeraj Kunnumpurath*


*Director and Executive PrincipalService Symphony Ltd00 44 7702 693597*

*00 971 50 409 0169mee...@servicesymphony.com <mee...@servicesymphony.com>*

Reply via email to