import com.google.gson.{GsonBuilder, JsonParser} import org.apache.spark.mllib.clustering.KMeans import org.apache.spark.sql.SQLContext import org.apache.spark.{SparkConf, SparkContext} import org.apache.spark.mllib.clustering.KMeans /** * Examine the collected tweets and trains a model based on them. */ object ExamineAndTrain { val jsonParser = new JsonParser() val gson = new GsonBuilder().setPrettyPrinting().create() def main(args: Array[String]) { val outputModelDir="C:\\outputmode111" val tweetInput="C:\\test" val numClusters=10 val numIterations=20
val conf = new SparkConf().setAppName(this.getClass.getSimpleName).setMaster("local[4]").set("spark.executor.memory", "1g") val sc = new SparkContext(conf) val tweets = sc.textFile(tweetInput) val vectors = tweets.map(Utils.featurize).cache() vectors.count() // Calls an action on the RDD to populate the vectors cache. val model = KMeans.train(vectors, numClusters, numIterations) sc.makeRDD(model.clusterCenters, numClusters).saveAsObjectFile(outputModelDir) val some_tweets = tweets.take(2) println("----Example tweets from the clusters") for (i <- 0 until numClusters) { println(s"\nCLUSTER $i:") some_tweets.foreach { t => if (model.predict(Utils.featurize(t)) == i) { println(t) } } } } } From: lovelylavs [via Apache Spark User List] [mailto:ml-node+s1001560n21956...@n3.nabble.com] Sent: Sunday, March 08, 2015 2:34 AM To: Jishnu Menath Prathap (WT01 - BAS) Subject: Re: Spark SQL Stackoverflow error Thank you so much for your reply. If it is possible can you please provide me with the code? Thank you so much. Lavanya. ________________________________ From: Jishnu Prathap [via Apache Spark User List] <ml-node+[hidden email]</user/SendEmail.jtp?type=node&node=21956&i=0>> Sent: Sunday, March 1, 2015 3:03 AM To: Nadikuda, Lavanya Subject: RE: Spark SQL Stackoverflow error Hi The Issue was not fixed . I removed the between sql layer and directly created features from the file. Regards Jishnu Prathap From: lovelylavs [via Apache Spark User List] [mailto:ml-node+[hidden email]</user/SendEmail.jtp?type=node&node=21863&i=0>] Sent: Sunday, March 01, 2015 4:44 AM To: Jishnu Menath Prathap (WT01 - BAS) Subject: Re: Spark SQL Stackoverflow error Hi, how was this issue fixed? ________________________________ If you reply to this email, your message will be added to the discussion below: http://apache-spark-user-list.1001560.n3.nabble.com/Spark-SQL-Stackoverflow-error-tp12086p21862.html To unsubscribe from Spark SQL Stackoverflow error, click here. NAML<http://apache-spark-user-list.1001560.n3.nabble.com/template/NamlServlet.jtp?macro=macro_viewer&id=instant_html%21nabble%3Aemail.naml&base=nabble.naml.namespaces.BasicNamespace-nabble.view.web.template.NabbleNamespace-nabble.view.web.template.NodeNamespace&breadcrumbs=notify_subscribers%21nabble%3Aemail.naml-instant_emails%21nabble%3Aemail.naml-send_instant_email%21nabble%3Aemail.naml> The information contained in this electronic message and any attachments to this message are intended for the exclusive use of the addressee(s) and may contain proprietary, confidential or privileged information. If you are not the intended recipient, you should not disseminate, distribute or copy this e-mail. Please notify the sender immediately and destroy all copies of this message and any attachments. WARNING: Computer viruses can be transmitted via email. The recipient should check this email and any attachments for the presence of viruses. The company accepts no liability for any damage caused by any virus transmitted by this email. www.wipro.com<http://www.wipro.com> ________________________________ If you reply to this email, your message will be added to the discussion below: http://apache-spark-user-list.1001560.n3.nabble.com/Spark-SQL-Stackoverflow-error-tp12086p21863.html To unsubscribe from Spark SQL Stackoverflow error, click here. NAML<http://apache-spark-user-list.1001560.n3.nabble.com/template/NamlServlet.jtp?macro=macro_viewer&id=instant_html%21nabble%3Aemail.naml&base=nabble.naml.namespaces.BasicNamespace-nabble.view.web.template.NabbleNamespace-nabble.view.web.template.NodeNamespace&breadcrumbs=notify_subscribers%21nabble%3Aemail.naml-instant_emails%21nabble%3Aemail.naml-send_instant_email%21nabble%3Aemail.naml> ________________________________ If you reply to this email, your message will be added to the discussion below: http://apache-spark-user-list.1001560.n3.nabble.com/Spark-SQL-Stackoverflow-error-tp12086p21956.html To unsubscribe from Spark SQL Stackoverflow error, click here<http://apache-spark-user-list.1001560.n3.nabble.com/template/NamlServlet.jtp?macro=unsubscribe_by_code&node=12086&code=amlzaG51LnByYXRoYXBAd2lwcm8uY29tfDEyMDg2fC0xNzUwOTc3MjE3>. NAML<http://apache-spark-user-list.1001560.n3.nabble.com/template/NamlServlet.jtp?macro=macro_viewer&id=instant_html%21nabble%3Aemail.naml&base=nabble.naml.namespaces.BasicNamespace-nabble.view.web.template.NabbleNamespace-nabble.view.web.template.NodeNamespace&breadcrumbs=notify_subscribers%21nabble%3Aemail.naml-instant_emails%21nabble%3Aemail.naml-send_instant_email%21nabble%3Aemail.naml> The information contained in this electronic message and any attachments to this message are intended for the exclusive use of the addressee(s) and may contain proprietary, confidential or privileged information. If you are not the intended recipient, you should not disseminate, distribute or copy this e-mail. Please notify the sender immediately and destroy all copies of this message and any attachments. WARNING: Computer viruses can be transmitted via email. The recipient should check this email and any attachments for the presence of viruses. The company accepts no liability for any damage caused by any virus transmitted by this email. www.wipro.com