If I drop the last feature on the third model, the error seems to go away. On Wed, Oct 12, 2016 at 11:52 PM, Meeraj Kunnumpurath < mee...@servicesymphony.com> wrote:
> Hello, > > I have some code trying to compare linear regression coefficients with > three sets of features, as shown below. On the third one, I get an > assertion error. > > This is the code, > > object MultipleRegression extends App { > > > > val spark = SparkSession.builder().appName("Regression Model > Builder").master("local").getOrCreate() > > import spark.implicits._ > > val training = build("kc_house_train_data.csv", "train", spark) > val test = build("kc_house_test_data.csv", "test", spark) > > val lr = new LinearRegression() > > val m1 = lr.fit(training.map(r => buildLp(r, "sqft_living", "bedrooms", > "bathrooms", "lat", "long"))) > println(s"Coefficients: ${m1.coefficients}, Intercept: ${m1.intercept}") > > val m2 = lr.fit(training.map(r => buildLp(r, "sqft_living", "bedrooms", > "bathrooms", "lat", "long", "bed_bath_rooms"))) > println(s"Coefficients: ${m2.coefficients}, Intercept: ${m2.intercept}") > > val m3 = lr.fit(training.map(r => buildLp(r, "sqft_living", "bedrooms", > "bathrooms", "lat", "long", "bed_bath_rooms", "bedrooms_squared", > "log_sqft_living", "lat_plus_long"))) > println(s"Coefficients: ${m3.coefficients}, Intercept: ${m3.intercept}") > > > def build(path: String, view: String, spark: SparkSession) = { > > val toDouble = udf((x: String) => x.toDouble) > val product = udf((x: Double, y: Double) => x * y) > val sum = udf((x: Double, y: Double) => x + y) > val log = udf((x: Double) => scala.math.log(x)) > > spark.read. > option("header", "true"). > csv(path). > withColumn("sqft_living", toDouble('sqft_living)). > withColumn("price", toDouble('price)). > withColumn("bedrooms", toDouble('bedrooms)). > withColumn("bathrooms", toDouble('bathrooms)). > withColumn("lat", toDouble('lat)). > withColumn("long", toDouble('long)). > withColumn("bedrooms_squared", product('bedrooms, 'bedrooms)). > withColumn("bed_bath_rooms", product('bedrooms, 'bathrooms)). > withColumn("lat_plus_long", sum('lat, 'long)). > withColumn("log_sqft_living", log('sqft_living)) > > } > > def buildLp(r: Row, input: String*) = { > var features = input.map(r.getAs[Double](_)).toArray > new LabeledPoint(r.getAs[Double]("price"), Vectors.dense(features)) > } > > } > > > This is the error I get. > > Exception in thread "main" java.lang.AssertionError: assertion failed: > lapack.dppsv returned 9. > at scala.Predef$.assert(Predef.scala:170) > at org.apache.spark.mllib.linalg.CholeskyDecomposition$.solve( > CholeskyDecomposition.scala:40) > at org.apache.spark.ml.optim.WeightedLeastSquares.fit( > WeightedLeastSquares.scala:140) > at org.apache.spark.ml.regression.LinearRegression. > train(LinearRegression.scala:180) > at org.apache.spark.ml.regression.LinearRegression. > train(LinearRegression.scala:70) > at org.apache.spark.ml.Predictor.fit(Predictor.scala:90) > at com.ss.ml.regression.MultipleRegression$.delayedEndpoint$com$ss$ml$ > regression$MultipleRegression$1(MultipleRegression.scala:36) > at com.ss.ml.regression.MultipleRegression$delayedInit$body.apply( > MultipleRegression.scala:12) > at scala.Function0$class.apply$mcV$sp(Function0.scala:34) > at scala.runtime.AbstractFunction0.apply$mcV$ > sp(AbstractFunction0.scala:12) > at scala.App$$anonfun$main$1.apply(App.scala:76) > at scala.App$$anonfun$main$1.apply(App.scala:76) > at scala.collection.immutable.List.foreach(List.scala:381) > at scala.collection.generic.TraversableForwarder$class. > foreach(TraversableForwarder.scala:35) > at scala.App$class.main(App.scala:76) > at com.ss.ml.regression.MultipleRegression$.main( > MultipleRegression.scala:12) > at com.ss.ml.regression.MultipleRegression.main(MultipleRegression.scala) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at sun.reflect.NativeMethodAccessorImpl.invoke( > NativeMethodAccessorImpl.java:62) > at sun.reflect.DelegatingMethodAccessorImpl.invoke( > DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:483) > at com.intellij.rt.execution.application.AppMain.main(AppMain.java:140) > > > Does anyone know what is going wrong here? > > Many thanks > > -- > *Meeraj Kunnumpurath* > > > *Director and Executive PrincipalService Symphony Ltd00 44 7702 693597* > > *00 971 50 409 0169mee...@servicesymphony.com <mee...@servicesymphony.com>* > -- *Meeraj Kunnumpurath* *Director and Executive PrincipalService Symphony Ltd00 44 7702 693597* *00 971 50 409 0169mee...@servicesymphony.com <mee...@servicesymphony.com>*