Now this, definitely would help to clarify the instructions; let me know if
I can help.

import org.apache.mahout.math._
import org.apache.mahout.math.scalabindings._
import org.apache.mahout.math.drm._
import org.apache.mahout.math.scalabindings.RLikeOps._
import org.apache.mahout.math.drm.RLikeDrmOps._
import org.apache.mahout.sparkbindings._
java.lang.NoClassDefFoundError: org/apache/mahout/math/AbstractMatrix
at
org.apache.mahout.sparkbindings.SparkDistributedContext.<init>(SparkDistributedContext.scala:25)
at org.apache.mahout.sparkbindings.package$.sc2sdc(package.scala:98)
at
$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:59)
at
$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:64)
at
$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:66)
at
$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:68)
at
$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:70)
at
$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:72)
at
$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:74)
at
$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:76)
at
$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:78)
at
$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:80)
at
$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:82)
at
$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:84)
at
$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:86)
at
$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:88)
at
$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:90)
at
$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:92)
at
$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:94)
at
$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:96)
at
$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:98)
at
$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:100)
at
$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:102)
at
$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:104)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:106)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:108)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:110)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:112)
at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:114)
at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:116)
at $iwC$$iwC$$iwC$$iwC.<init>(<console>:118)
at $iwC$$iwC$$iwC.<init>(<console>:120)
at $iwC$$iwC.<init>(<console>:122)
at $iwC.<init>(<console>:124)
at <init>(<console>:126)
at .<init>(<console>:130)
at .<clinit>(<console>)
at .<init>(<console>:7)
at .<clinit>(<console>)
at $print(<console>)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:497)
at
org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)
at
org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1338)
at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871)
at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819)
at
org.apache.zeppelin.spark.SparkInterpreter.interpretInput(SparkInterpreter.java:812)
at
org.apache.zeppelin.spark.SparkInterpreter.interpret(SparkInterpreter.java:755)
at
org.apache.zeppelin.spark.SparkInterpreter.interpret(SparkInterpreter.java:748)
at
org.apache.zeppelin.interpreter.ClassloaderInterpreter.interpret(ClassloaderInterpreter.java:57)
at
org.apache.zeppelin.interpreter.LazyOpenInterpreter.interpret(LazyOpenInterpreter.java:93)
at
org.apache.zeppelin.interpreter.remote.RemoteInterpreterServer$InterpretJob.jobRun(RemoteInterpreterServer.java:331)
at org.apache.zeppelin.scheduler.Job.run(Job.java:171)
at org.apache.zeppelin.scheduler.FIFOScheduler$1.run(FIFOScheduler.java:139)
at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
at java.util.concurrent.FutureTask.run(FutureTask.java:266)
at
java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)
at
java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)
at
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
at
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
at java.lang.Thread.run(Thread.java:745)
Caused by: java.lang.ClassNotFoundException:
org.apache.mahout.math.AbstractMatrix
at java.net.URLClassLoader.findClass(URLClassLoader.java:381)
at java.lang.ClassLoader.loadClass(ClassLoader.java:424)
at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:331)
at java.lang.ClassLoader.loadClass(ClassLoader.java:357)
... 64 more

On Fri, May 20, 2016 at 3:41 PM, Andrew Musselman <
andrew.mussel...@gmail.com> wrote:

> Oh might have been a browser cache issue; even after a couple hard refresh
> methods using another browser has the import link.
>
> On Fri, May 20, 2016 at 3:36 PM, Andrew Musselman <
> andrew.mussel...@gmail.com> wrote:
>
>> Trevor, my zeppelin source is at this version:
>>
>>   <groupId>org.apache.zeppelin</groupId>
>>   <artifactId>zeppelin</artifactId>
>>   <packaging>pom</packaging>
>>   <version>0.6.0-incubating-SNAPSHOT</version>
>>   <name>Zeppelin</name>
>>   <description>Zeppelin project</description>
>>   <url>http://zeppelin.incubator.apache.org/</url>
>>
>> And yes you're right the artifacts weren't added to the dependencies; is
>> that a feature in more modern zep?
>>
>> On Fri, May 20, 2016 at 3:02 PM, Dmitriy Lyubimov <dlie...@gmail.com>
>> wrote:
>>
>>> no parenthesis.
>>>
>>> import o.a.m.sparkbindings._
>>> ....
>>> myRdd = myDrm.rdd
>>>
>>>
>>> On Fri, May 20, 2016 at 2:57 PM, Suneel Marthi <smar...@apache.org>
>>> wrote:
>>>
>>> > On Fri, May 20, 2016 at 3:18 PM, Trevor Grant <
>>> trevor.d.gr...@gmail.com>
>>> > wrote:
>>> >
>>> > > Hey Pat,
>>> > >
>>> > > If you spit out a TSV - you can import into pyspark / matplotlib
>>> from the
>>> > > resource pool in essentially the same way and use that plotting
>>> library
>>> > if
>>> > > you prefer.  In fact you could import the tsv into pandas and use
>>> all of
>>> > > the pandas plotting as well (though I think it is for the most part,
>>> also
>>> > > matplotlib with some convenience functions).
>>> > >
>>> > >
>>> > >
>>> >
>>> https://www.zeppelinhub.com/viewer/notebooks/aHR0cHM6Ly9yYXcuZ2l0aHVidXNlcmNvbnRlbnQuY29tL2ZlbGl4Y2hldW5nL3NwYXJrLW5vdGVib29rLWV4YW1wbGVzL21hc3Rlci9aZXBwZWxpbl9ub3RlYm9vay8yQU1YNUNWQ1Uvbm90ZS5qc29u
>>> > >
>>> > > In Zeppelin, unless you specify otherwise, pyspark, sparkr,
>>> spark-sql,
>>> > and
>>> > > scala-spark all share the same spark context you can create RDDs in
>>> one
>>> > > language and access them / work on them in another (so I understand).
>>> > >
>>> > > So in Mahout can you "save" a matrix as a RDD? e.g. something like
>>> > >
>>> > > val myRDD = myDRM.asRDD()
>>> > >
>>> >
>>> > val myRDD = myDRM.rdd()
>>> >
>>> > >
>>> > > And would 'myRDD' then exist in the spark context?
>>> > >
>>> > > yes it will be in sparkContext
>>> >
>>> > >
>>> > > Trevor Grant
>>> > > Data Scientist
>>> > > https://github.com/rawkintrevo
>>> > > http://stackexchange.com/users/3002022/rawkintrevo
>>> > > http://trevorgrant.org
>>> > >
>>> > > *"Fortunate is he, who is able to know the causes of things."
>>> -Virgil*
>>> > >
>>> > >
>>> > > On Fri, May 20, 2016 at 12:21 PM, Pat Ferrel <p...@occamsmachete.com>
>>> > > wrote:
>>> > >
>>> > > > Agreed.
>>> > > >
>>> > > > BTW I don’t want to stall progress but being the most ignorant of
>>> plot
>>> > > > libs, I’ll ask if we should consider python and matplotlib. In
>>> another
>>> > > > project we use python because of the RDD support on Spark though
>>> the
>>> > > > visualizations are extremely limited in our case. If we can pass
>>> an RDD
>>> > > to
>>> > > > pyspark it would allow custom reductions in python before plotting,
>>> > even
>>> > > > though we will support many natively in Mahout. I’m guessing that
>>> this
>>> > > > would cross a context boundary and require a write to disk?
>>> > > >
>>> > > > So 2 questions:
>>> > > > 1) what does the inter language support look like with Spark
>>> python vs
>>> > > > SparkR, can we transfer RDDs?
>>> > > > 2) are the plot libs significantly different?
>>> > > >
>>> > > > On May 20, 2016, at 9:54 AM, Trevor Grant <
>>> trevor.d.gr...@gmail.com>
>>> > > > wrote:
>>> > > >
>>> > > > Dmitriy really nailed it on the head in his reply to the post which
>>> > I'll
>>> > > > rebroadcast below. In essence the whole reason you are
>>> (theoretically)
>>> > > > using Mahout is the data is to big to fit in memory.  If it's to
>>> big to
>>> > > fit
>>> > > > in memory, well then its probably too big to plot each point (e.g.
>>> > > > trillions of row, you only have so many pixels).   For the example
>>> I
>>> > > > randomly sampled a matrix.
>>> > > >
>>> > > > So as Dmitriy says, in Mahout we need to have functions that will
>>> > > > 'preprocess' the data into something plotable.
>>> > > >
>>> > > > For the Zepplin-Plotting thing, we need to have a function that
>>> will
>>> > spit
>>> > > > out a tsv like string of the data we wanted plotted.
>>> > > >
>>> > > > I agree an honest Mahout interpreter in Zeppelin is probably worth
>>> > doing.
>>> > > > There are a couple of ways to go about it. I opened up the
>>> discussion
>>> > on
>>> > > > dev@Zeppelin and didn't get any replies. I'm going to take that to
>>> > mean
>>> > > we
>>> > > > can do it in a way that makes the most sense to Mahout users...
>>> > > >
>>> > > > First steps are to include some methods in Mahout that will do that
>>> > > > preprocessing, and one that will turn something into a tsv string.
>>> > > >
>>> > > > I have some general ideas on possible approached to making an
>>> > > honest-mahout
>>> > > > interpreter but I want to play in the code and look at the
>>> Flink-Mahout
>>> > > > shell a bit before I try to organize my thoughts and present them.
>>> > > >
>>> > > > ...(2) not sure what is the point of supporting distributed
>>> anything.
>>> > It
>>> > > is
>>> > > > distributed presumably because it is hard to keep it in memory.
>>> > > Therefore,
>>> > > > plotting anything distributed potentially presents 2 problems:
>>> storage
>>> > > > space and overplotting due to number of points. The idea is that we
>>> > have
>>> > > to
>>> > > > work out algorithms that condense big data information into small
>>> > > plottable
>>> > > > information (like density grids, for example, or histograms)....
>>> > > >
>>> > > > Trevor Grant
>>> > > > Data Scientist
>>> > > > https://github.com/rawkintrevo
>>> > > > http://stackexchange.com/users/3002022/rawkintrevo
>>> > > > http://trevorgrant.org
>>> > > >
>>> > > > *"Fortunate is he, who is able to know the causes of things."
>>> -Virgil*
>>> > > >
>>> > > >
>>> > > > On Fri, May 20, 2016 at 10:22 AM, Pat Ferrel <
>>> p...@occamsmachete.com>
>>> > > > wrote:
>>> > > >
>>> > > > > Great job Trevor, we’ll need this detail to smooth out the sharp
>>> > edges
>>> > > > and
>>> > > > > any guidance from you or the Zeppelin community will be a big
>>> help.
>>> > > > >
>>> > > > >
>>> > > > > On May 20, 2016, at 8:13 AM, Shannon Quinn <squ...@gatech.edu>
>>> > wrote:
>>> > > > >
>>> > > > > Agreed, thoroughly enjoying the blog post.
>>> > > > >
>>> > > > > On 5/19/16 12:01 AM, Andrew Palumbo wrote:
>>> > > > >> Well done, Trevor!  I've not yet had a chance to try this in
>>> > zeppelin
>>> > > > > but I just read the blog which is great!
>>> > > > >>
>>> > > > >> -------- Original message --------
>>> > > > >> From: Trevor Grant <trevor.d.gr...@gmail.com>
>>> > > > >> Date: 05/18/2016 2:44 PM (GMT-05:00)
>>> > > > >> To: dev@mahout.apache.org
>>> > > > >> Subject: Re: Future Mahout - Zeppelin work
>>> > > > >>
>>> > > > >> Ah thank you.
>>> > > > >>
>>> > > > >> Fixing now.
>>> > > > >>
>>> > > > >>
>>> > > > >> Trevor Grant
>>> > > > >> Data Scientist
>>> > > > >> https://github.com/rawkintrevo
>>> > > > >> http://stackexchange.com/users/3002022/rawkintrevo
>>> > > > >> http://trevorgrant.org
>>> > > > >>
>>> > > > >> *"Fortunate is he, who is able to know the causes of things."
>>> > > -Virgil*
>>> > > > >>
>>> > > > >>
>>> > > > >> On Wed, May 18, 2016 at 1:04 PM, Andrew Palumbo <
>>> ap....@outlook.com
>>> > >
>>> > > > > wrote:
>>> > > > >>
>>> > > > >>> Hey Trevor- Just refreshed your readme.  The jar that I
>>> mentioned
>>> > is
>>> > > > >>> actually:
>>> > > > >>>
>>> > > > >>>
>>> > > > >>>
>>> > > > >
>>> > > >
>>> > >
>>> >
>>> /home/username/.m2/repository/org/apache/mahout/mahout-spark_2.10/0.12.1-SNAPSHOT/mahout-spark_2.10-0.12.1-SNAPSHOT-dependency-reduced.jar
>>> > > > >>>
>>> > > > >>> rather than:
>>> > > > >>>
>>> > > > >>>
>>> > > > >>>
>>> > > > >
>>> > > >
>>> > >
>>> >
>>> /home/username/.m2/repository/org/apache/mahout/mahout-spark-shell_2.10/0.12.1-SNAPSHOT/mahout-spark_2.10-0.12.1-SNAPSHOT-dependency-reduced.jar
>>> > > > >>>
>>> > > > >>> (In the spark module that is)
>>> > > > >>> ________________________________________
>>> > > > >>> From: Trevor Grant <trevor.d.gr...@gmail.com>
>>> > > > >>> Sent: Wednesday, May 18, 2016 11:02:43 AM
>>> > > > >>> To: dev@mahout.apache.org
>>> > > > >>> Subject: Re: Future Mahout - Zeppelin work
>>> > > > >>>
>>> > > > >>> ah yes- I remember you pointing that out to me too.
>>> > > > >>>
>>> > > > >>> I got side tracked yesterday for most of the day on an
>>> adventure in
>>> > > > > getting
>>> > > > >>> Zeppelin to work right after I accidently updated to the new
>>> > snapshot
>>> > > > > (free
>>> > > > >>> hint: the secret was to clear my cache *face-palm*)
>>> > > > >>>
>>> > > > >>> I'm going to add that dependency to the readme.md now.
>>> > > > >>>
>>> > > > >>> thanks,
>>> > > > >>> tg
>>> > > > >>>
>>> > > > >>> Trevor Grant
>>> > > > >>> Data Scientist
>>> > > > >>> https://github.com/rawkintrevo
>>> > > > >>> http://stackexchange.com/users/3002022/rawkintrevo
>>> > > > >>> http://trevorgrant.org
>>> > > > >>>
>>> > > > >>> *"Fortunate is he, who is able to know the causes of things."
>>> > > -Virgil*
>>> > > > >>>
>>> > > > >>>
>>> > > > >>> On Wed, May 18, 2016 at 9:59 AM, Andrew Palumbo <
>>> > ap....@outlook.com>
>>> > > > >>> wrote:
>>> > > > >>>
>>> > > > >>>> Trevor this is very cool- I have not been able to look at it
>>> > closely
>>> > > > > yet
>>> > > > >>>> but just a small point: I believe that you'll also need to
>>> add the
>>> > > > >>>>
>>> > > > >>>> mahout-spark_2.10-0.12.1-SNAPSHOT-dependency-reduced.jar
>>> > > > >>>>
>>> > > > >>>> For things like the classification stats, confusion matrix,
>>> and
>>> > > > > t-digest.
>>> > > > >>>>
>>> > > > >>>> Andy
>>> > > > >>>>
>>> > > > >>>> ________________________________________
>>> > > > >>>> From: Trevor Grant <trevor.d.gr...@gmail.com>
>>> > > > >>>> Sent: Wednesday, May 18, 2016 10:47:21 AM
>>> > > > >>>> To: dev@mahout.apache.org
>>> > > > >>>> Subject: Re: Future Mahout - Zeppelin work
>>> > > > >>>>
>>> > > > >>>> I still need to update my readme/env per Pat's comments below,
>>> > > however
>>> > > > >>> with
>>> > > > >>>> out further ado, I present two notebooks that integrate
>>> Mahout +
>>> > > Spark
>>> > > > > +
>>> > > > >>>> Zeppelin + ggplot2
>>> > > > >>>>
>>> > > > >>>> https://github.com/rawkintrevo/mahout-zeppelin
>>> > > > >>>>
>>> > > > >>>> Supposing you have a somewhat recent version of Zeppelin 0.6
>>> with
>>> > > > > sparkr
>>> > > > >>>> support running already, you may import the following raw
>>> notes
>>> > > > > directly
>>> > > > >>>> into Zeppelin:
>>> > > > >>>>
>>> > > > >>>>
>>> > > > >>>>
>>> > > > >>>
>>> > > > >
>>> > > >
>>> > >
>>> >
>>> https://raw.githubusercontent.com/rawkintrevo/mahout-zeppelin/master/%5BMAHOUT%5D%5BPROVING-GROUNDS%5DLinear%20Regression%20in%20Spark.json
>>> > > > >>>>
>>> > > > >>>>
>>> > > > >>>
>>> > > > >
>>> > > >
>>> > >
>>> >
>>> https://raw.githubusercontent.com/rawkintrevo/mahout-zeppelin/master/%5BMAHOUT%5D%5BPROVING-GROUNDS%5DSpark-Mahout%2Bggplot2.json
>>> > > > >>>> So my thoughs on next steps, which I'm positing only as a
>>> starting
>>> > > > > point
>>> > > > >>>> for discussion, and are in no particular order of importance:
>>> > > > >>>>
>>> > > > >>>> - Blog on HOWTO for everyman (assumes no familiarity with
>>> Mahout,
>>> > > and
>>> > > > >>> only
>>> > > > >>>> enough familiarity with Zeppelin to have Zeppelin + SparkR
>>> > support)
>>> > > > >>>> - Some syntactic sugar somewhere in Mahout to convert a matrix
>>> > into
>>> > > a
>>> > > > > tsv
>>> > > > >>>> string. (with some sanity, eg a sample of a matrix)
>>> > > > >>>> - Figure out with Zeppelin community what deeper integration
>>> feels
>>> > > > > like -
>>> > > > >>>> e.g. build-profile vs. tutorial
>>> > > > >>>>  - I think the case for making a build-profile is that
>>> Zeppelin is
>>> > > > > first
>>> > > > >>>> and foremost a datascience tool for non technical users.
>>> > > > >>>>  - If we go that route I'll need some more support finding out
>>> > what
>>> > > is
>>> > > > >>> the
>>> > > > >>>> absolute minimum 'bare-bones' mahout we can include, e.g.
>>> does the
>>> > > > user
>>> > > > >>>> have to have mahout installed? To be discussed.
>>> > > > >>>> - Add matplotlib (python) "support" -> paragraph showing how
>>> to do
>>> > > the
>>> > > > >>> same
>>> > > > >>>> thing in Python.
>>> > > > >>>>
>>> > > > >>>> The basic deal here is we are:
>>> > > > >>>> 1) Setting up a standard Zeppelin Spark Interpretter to act
>>> like a
>>> > > > > Mahout
>>> > > > >>>> interpretter
>>> > > > >>>>    - This is taken care of by setting some env. variables,
>>> adding
>>> > > some
>>> > > > >>>> dependencies, and importing relevent packages
>>> > > > >>>> 2) do mahout things as you do
>>> > > > >>>> 3) export table to tsv string, which is passed to a resource
>>> pool
>>> > > > >>>>   - This could be done to a disk if you didn't have zeppelin
>>> > > > >>>> 4) read the tsv from the resource pool (or disk if you didn't
>>> have
>>> > > > >>>> zeppelin) in R (python soon) and create a <plot package of
>>> your
>>> > > > choice>
>>> > > > >>>>
>>> > > > >>>> To Pat's point- this is a kind of clumsy pipeline, however the
>>> > > > Zeppelin
>>> > > > >>>> wrapper at least makes it *feel* less so.
>>> > > > >>>>
>>> > > > >>>>
>>> > > > >>>> Trevor Grant
>>> > > > >>>> Data Scientist
>>> > > > >>>> https://github.com/rawkintrevo
>>> > > > >>>> http://stackexchange.com/users/3002022/rawkintrevo
>>> > > > >>>> http://trevorgrant.org
>>> > > > >>>>
>>> > > > >>>> *"Fortunate is he, who is able to know the causes of things."
>>> > > > -Virgil*
>>> > > > >>>>
>>> > > > >>>>
>>> > > > >>>> On Tue, May 17, 2016 at 1:17 PM, Pat Ferrel <
>>> > p...@occamsmachete.com>
>>> > > > >>> wrote:
>>> > > > >>>>> Seems like there is plenty to use in ggplot or python but the
>>> > > > pipeline
>>> > > > >>> is
>>> > > > >>>>> a little convoluted (so maybe no need for Angular
>>> integration).
>>> > To
>>> > > > get
>>> > > > >>>>> graphics out of Mahout it would be nice to not require
>>> knowledge
>>> > > of R
>>> > > > >>>>> and/or python. Knowing Mahout is already bad enough but I
>>> guess
>>> > the
>>> > > > > API
>>> > > > >>>>> from the Mahout side for plotting could be Scala syntactic
>>> sugar.
>>> > > > What
>>> > > > >>>> and
>>> > > > >>>>> how this all is installed and setup is the next question.
>>> > > > >>>>>
>>> > > > >>>>> BTW this is what I use elsewhere (Mahout as a lib to this
>>> code)
>>> > > > >>>>>
>>> > > > >>>>>    "spark.serializer":
>>> > > "org.apache.spark.serializer.KryoSerializer",
>>> > > > >>>>>    "spark.kryo.registrator":
>>> > > > >>>>> "org.apache.mahout.sparkbindings.io.MahoutKryoRegistrator",
>>> > > > >>>>>    "spark.kryo.referenceTracking": "false",
>>> > > > >>>>>    "spark.kryoserializer.buffer": "300m”,
>>> > > > >>>>>
>>> > > > >>>>> afaik you will only see if Kryo is working when you have to
>>> > > serialize
>>> > > > > a
>>> > > > >>>>> mahout specific data type like vector of drm, something
>>> > registered
>>> > > > > with
>>> > > > >>>>> Kryo.
>>> > > > >>>>>
>>> > > > >>>>>
>>> > > > >>>>> On May 16, 2016, at 6:18 PM, Trevor Grant <
>>> > > trevor.d.gr...@gmail.com>
>>> > > > >>>>> wrote:
>>> > > > >>>>>
>>> > > > >>>>> As a quick recap- we're trying to leverage Zeppelin for
>>> charting.
>>> > > > >>>>>
>>> > > > >>>>> It seems as though this can be achieved by
>>> > > > >>>>> - Adding properties to the Spark Interpreter
>>> > > > >>>>> - Adding dependency jars to the spark interpreter
>>> > > > >>>>> - importing in a spark paragraph
>>> > > > >>>>>
>>> > > > >>>>> All seems to be working well, but I've fooled myself into
>>> > thinking
>>> > > > >>> things
>>> > > > >>>>> were 'working' before because I wasn't actually integrating.
>>> > Lower
>>> > > I
>>> > > > >>> will
>>> > > > >>>>> outline the imports/properties, please look over and tell me
>>> if
>>> > I'm
>>> > > > >>>>> theoretically missing anything.
>>> > > > >>>>>
>>> > > > >>>>> The next phase for me will be
>>> > > > >>>>> 1) Convert a matrix to some sort of serializable object that
>>> I
>>> > can
>>> > > > >>> easily
>>> > > > >>>>> unpack from R
>>> > > > >>>>> 2) use Zeppelin's resource buffers to pass the object
>>> > > > >>>>> 3) collect the object in an R paragraph, convert it to a
>>> > dataframe
>>> > > > > then
>>> > > > >>>> map
>>> > > > >>>>> using ggplot
>>> > > > >>>>>
>>> > > > >>>>> Once I have a working prototype I will work add some
>>> syntactic
>>> > > sugar
>>> > > > > to
>>> > > > >>>>> prepare the matrix from the scala side and pass to zeppelin
>>> > (using
>>> > > > >>>> resource
>>> > > > >>>>> pools so the same functionality can be reused in Flink) and
>>> an R
>>> > > > >>> library
>>> > > > >>>>> containing some functions which will pull the data out of the
>>> > > > resource
>>> > > > >>>> pool
>>> > > > >>>>> and spit out a dataframe.
>>> > > > >>>>>
>>> > > > >>>>> Once its in a Dataframe in R- go nuts with any plotting
>>> package
>>> > you
>>> > > > >>> like.
>>> > > > >>>>> Likewise, it should be possible to do the same thing with
>>> > > matplotlib
>>> > > > >>> and
>>> > > > >>>>> python (
>>> > https://gist.github.com/andershammar/9070e0f6916a0fbda7a5)
>>> > > > >>>>>
>>> > > > >>>>> All of this doesn't necessarily require any changing of the
>>> > > Zeppelin
>>> > > > >>>> source
>>> > > > >>>>> code, and isn't very intrusive or difficult to set up, I'll
>>> make
>>> > a
>>> > > > > blog
>>> > > > >>>>> post but its almost a text book entry tutorial on using
>>> imports
>>> > in
>>> > > > >>>>> Zeppelin. (e.g. a tutorial would be just as at home on the
>>> > Zeppelin
>>> > > > >>> site
>>> > > > >>>> as
>>> > > > >>>>> it would on the Mahout site).
>>> > > > >>>>>
>>> > > > >>>>> Now, there has been some talk of using Zeppelin's angularJS.
>>> > > Things
>>> > > > >>> get
>>> > > > >>>> a
>>> > > > >>>>> little more harry in that case, but we could make an optional
>>> > build
>>> > > > >>>> profile
>>> > > > >>>>> that would make zeppelin recognize matrices at tables and
>>> expose
>>> > > all
>>> > > > > of
>>> > > > >>>> the
>>> > > > >>>>> built in charting features of Zeppelin.
>>> > > > >>>>>
>>> > > > >>>>> If you're not adding a bunch of custom charts to Zeppelin
>>> (which
>>> > > > would
>>> > > > >>> be
>>> > > > >>>>> somewhat tedious), you're going to end up with a lot of
>>> examples
>>> > > > where
>>> > > > >>>> you
>>> > > > >>>>> create a table in Mahout/Spark pass it to AngularJS then some
>>> > > > > AngularJS
>>> > > > >>>>> code charts it for you.  At that point however, you're doing
>>> just
>>> > > as
>>> > > > >>> much
>>> > > > >>>>> work, if not more than it would be to simply pass to R or
>>> Python
>>> > > and
>>> > > > >>> let
>>> > > > >>>>> ggplot or matlibplot do the work for you.
>>> > > > >>>>>
>>> > > > >>>>> Finally, I haven't run into any errors yet using Kyro (which
>>> in
>>> > > part
>>> > > > > is
>>> > > > >>>>> what makes me fear I'm not doing this right... it was too
>>> > easy...)
>>> > > If
>>> > > > >>>>> anything seems redundant or missing, please call it out.
>>> > > > >>>>>
>>> > > > >>>>> Add Properties to Spark interp:
>>> > > > >>>>>
>>> > > > >>>>> spark.kryo.registrator
>>> > > > >>>>> org.apache.mahout.sparkbindings.io.MahoutKryoRegistrator
>>> > > > >>>>> spark.serializer org.apache.spark.serializer.KryoSerializer
>>> > > > >>>>>
>>> > > > >>>>> Add artifacts (need to change these to maven not local, also
>>> need
>>> > > to
>>> > > > >>>>> add/change one jar per below, however this does run):
>>> > > > >>>>>
>>> > > > >>>>>
>>> > > > >>>>>
>>> > > > >>>
>>> > > > >
>>> > > >
>>> > >
>>> >
>>> /home/trevor/.m2/repository/org/apache/mahout/mahout-math/0.12.1-SNAPSHOT/mahout-math-0.12.1-SNAPSHOT.jar
>>> > > > >>>>>
>>> > > > >>>
>>> > > > >
>>> > > >
>>> > >
>>> >
>>> /home/trevor/.m2/repository/org/apache/mahout/mahout-math-scala_2.10/0.12.1-SNAPSHOT/mahout-math-scala_2.10-0.12.1-SNAPSHOT.jar
>>> > > > >>>>>
>>> > > > >>>
>>> > > > >
>>> > > >
>>> > >
>>> >
>>> /home/trevor/.m2/repository/org/apache/mahout/mahout-spark_2.10/0.12.1-SNAPSHOT/mahout-spark_2.10-0.12.1-SNAPSHOT.jar
>>> > > > >>>>>
>>> > > > >>>
>>> > > > >
>>> > > >
>>> > >
>>> >
>>> /home/trevor/.m2/repository/org/apache/mahout/mahout-spark-shell_2.10/0.12.1-SNAPSHOT/mahout-spark-shell_2.10-0.12.1-SNAPSHOT.jar
>>> > > > >>>>> Add following code to first paragraph of notebook:
>>> > > > >>>>> ```
>>> > > > >>>>> %spark
>>> > > > >>>>> import org.apache.mahout.math._
>>> > > > >>>>> import org.apache.mahout.math.scalabindings._
>>> > > > >>>>> import org.apache.mahout.math.drm._
>>> > > > >>>>> import org.apache.mahout.math.scalabindings.RLikeOps._
>>> > > > >>>>> import org.apache.mahout.math.drm.RLikeDrmOps._
>>> > > > >>>>> import org.apache.mahout.sparkbindings._
>>> > > > >>>>>
>>> > > > >>>>> implicit val sdc:
>>> > > > >>>> org.apache.mahout.sparkbindings.SparkDistributedContext =
>>> > > > >>>>> sc2sdc(sc)
>>> > > > >>>>> ```
>>> > > > >>>>>
>>> > > > >>>>>
>>> > > > >>>>>
>>> > > > >>>>> Trevor Grant
>>> > > > >>>>> Data Scientist
>>> > > > >>>>> https://github.com/rawkintrevo
>>> > > > >>>>> http://stackexchange.com/users/3002022/rawkintrevo
>>> > > > >>>>> http://trevorgrant.org
>>> > > > >>>>>
>>> > > > >>>>> *"Fortunate is he, who is able to know the causes of things."
>>> > > > > -Virgil*
>>> > > > >>>>>
>>> > > > >>>>>
>>> > > > >>>>> On Mon, May 16, 2016 at 6:42 PM, Pat Ferrel <
>>> > p...@occamsmachete.com
>>> > > >
>>> > > > >>>> wrote:
>>> > > > >>>>>> Creating an mc used to do some Kryo setup, like registering
>>> > > > >>> serializers
>>> > > > >>>>> or
>>> > > > >>>>>> serializer factories IIRC. Also there is the Spark conf for
>>> > > > >>> allocating
>>> > > > >>>>>> memory for the Kryo buffer. Look at the code in the mc
>>> creation
>>> > > code
>>> > > > >>> in
>>> > > > >>>>> the
>>> > > > >>>>>> Spark package helpers. All can be done in straight Spark and
>>> > > passed
>>> > > > >>> in
>>> > > > >>>> to
>>> > > > >>>>>> create the mc when needed. Again from old weak brain cells
>>> but I
>>> > > > >>> think
>>> > > > >>>>> that
>>> > > > >>>>>> is part of what makes the Mahout shell different than teh
>>> Spark
>>> > > > shell
>>> > > > >>>>> plus
>>> > > > >>>>>> imports, it auto-creates the mc instead of or along with an
>>> sc.
>>> > > > >>>>>>
>>> > > > >>>>>> When I get back to my computer I can check.
>>> > > > >>>>>>
>>> > > > >>>>>> On May 16, 2016, at 3:40 PM, Andrew Palumbo <
>>> ap....@outlook.com
>>> > >
>>> > > > >>>> wrote:
>>> > > > >>>>>> Trevor,
>>> > > > >>>>>>
>>> > > > >>>>>> Could you post any kryo errors that you may be having?
>>> > > > >>>>>>
>>> > > > >>>>>> ________________________________
>>> > > > >>>>>> From: Andrew Palumbo <ap....@outlook.com>
>>> > > > >>>>>> Sent: Monday, May 16, 2016 6:25:07 PM
>>> > > > >>>>>> To: mahout
>>> > > > >>>>>> Subject: Future Mahout - Zeppelin work
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>> To Dmitriy's point, I agree ggplot is def the priority,  The
>>> > > mahout
>>> > > > >>>> plots
>>> > > > >>>>>> are at this point are really just a POC, but at some point
>>> we
>>> > may
>>> > > be
>>> > > > >>>> want
>>> > > > >>>>>> to integrate some data transformation features into the
>>> mahout
>>> > > plots
>>> > > > >>>>>> classes so they're really more future work.
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>> long story short:
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>>> OK. I'll read through the examples and try to do something
>>> with
>>> > > > some
>>> > > > >>>>>> data, then do a ggplot and/or an angular plot on it
>>> (probably
>>> > > > >>> ggplot).
>>> > > > >>>>>>> I'll do a quick tutorial. Then I'll reopen discussion on
>>> that
>>> > > > >>> Zeppelin
>>> > > > >>>>>> issue about weather we want to go ahead and add another
>>> > > interpreter.
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>> Souds Great.
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>> Thank you.
>>> > > > >>>>>>
>>> > > > >>>>>> ________________________________
>>> > > > >>>>>> From: Trevor Grant <trevor.d.gr...@gmail.com>
>>> > > > >>>>>> Sent: Monday, May 16, 2016 5:49:17 PM
>>> > > > >>>>>> To: Dmitriy Lyubimov
>>> > > > >>>>>> Cc: Andrew Palumbo; Pat Ferrel; Suneel Marthi
>>> > > > >>>>>> Subject: Re: Intro - Future Mahout - Zeppelin work
>>> > > > >>>>>>
>>> > > > >>>>>> I just signed up for dev, should i just reply all and cc
>>> dev or
>>> > > > >>> start a
>>> > > > >>>>>> new thread?
>>> > > > >>>>>>
>>> > > > >>>>>> Trevor Grant
>>> > > > >>>>>> Data Scientist
>>> > > > >>>>>> https://github.com/rawkintrevo
>>> > > > >>>>>> [https://avatars3.githubusercontent.com/u/5852441?v=3&s=400
>>> ]<
>>> > > > >>>>>> https://github.com/rawkintrevo>
>>> > > > >>>>>>
>>> > > > >>>>>> rawkintrevo (Trevor Grant) · GitHub<
>>> > > https://github.com/rawkintrevo>
>>> > > > >>>>>> github.com
>>> > > > >>>>>> rawkintrevo has 12 repositories written in Python,
>>> Batchfile,
>>> > and
>>> > > R.
>>> > > > >>>>>> Follow their code on GitHub.
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>> http://stackexchange.com/users/3002022/rawkintrevo
>>> > > > >>>>>> http://trevorgrant.org
>>> > > > >>>>>>
>>> > > > >>>>>> "Fortunate is he, who is able to know the causes of things."
>>> > > > -Virgil
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>> On Mon, May 16, 2016 at 4:46 PM, Dmitriy Lyubimov <
>>> > > > dlie...@gmail.com
>>> > > > >>>>>> <mailto:dlie...@gmail.com>> wrote:
>>> > > > >>>>>> fwiw ggplot2 is pretty darn advanced:) i am a bit skeptical
>>> > smile
>>> > > > >>> would
>>> > > > >>>>>> have something that ggplot2 would not, the other way around
>>> is
>>> > > much
>>> > > > >>>> more
>>> > > > >>>>>> expected by me:)
>>> > > > >>>>>>
>>> > > > >>>>>> anyhow if ggplot2 and matplotlib are available in Zeppelin
>>> > without
>>> > > > >>>> major
>>> > > > >>>>>> limitations, it sounds like Zeppelin should be an all around
>>> > very
>>> > > > >>> nice
>>> > > > >>>>>> venue then.
>>> > > > >>>>>>
>>> > > > >>>>>> On Mon, May 16, 2016 at 2:42 PM, Andrew Palumbo <
>>> > > ap....@outlook.com
>>> > > > >>>>>> <mailto:ap....@outlook.com>> wrote:
>>> > > > >>>>>>
>>> > > > >>>>>> yeah we should probably move this over to dev@
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>> sorry- answering a question from a couple emails back on the
>>> > > thread.
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>> If possible,  I think it would be great to eventually have
>>> both
>>> > > > >>> (native
>>> > > > >>>>>> mahout/smile plots and ggplot), since in the future we're
>>> going
>>> > to
>>> > > > be
>>> > > > >>>>>> adding more visualization features rather than simple
>>> scatter
>>> > > plots
>>> > > > >>> etc
>>> > > > >>>>>> that may not be covered by ggplot.
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>> That's why we were thinking about using angular and the
>>> pngs.
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>> But what youre saying in your last email would be great!
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>> Thank you!
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>> ________________________________
>>> > > > >>>>>> From: Trevor Grant <trevor.d.gr...@gmail.com<mailto:
>>> > > > >>>>>> trevor.d.gr...@gmail.com>>
>>> > > > >>>>>> Sent: Monday, May 16, 2016 5:33:12 PM
>>> > > > >>>>>> To: Andrew Palumbo
>>> > > > >>>>>> Cc: Pat Ferrel; Suneel Marthi; Dmitriy Lyubimov
>>> > > > >>>>>>
>>> > > > >>>>>> Subject: Re: Intro - Future Mahout - Zeppelin work
>>> > > > >>>>>>
>>> > > > >>>>>> I somehow replied to your last email without seeing it...
>>> > > > >>>>>>
>>> > > > >>>>>> OK. I'll read through the examples and try to do something
>>> with
>>> > > some
>>> > > > >>>>> data,
>>> > > > >>>>>> then do a ggplot and/or an angular plot on it (probably
>>> ggplot).
>>> > > > >>>>>>
>>> > > > >>>>>> I'll do a quick tutorial. Then I'll reopen discussion on
>>> that
>>> > > > >>> Zeppelin
>>> > > > >>>>>> issue about weather we want to go ahead and add another
>>> > > interpreter.
>>> > > > >>>>>>
>>> > > > >>>>>> Trevor Grant
>>> > > > >>>>>> Data Scientist
>>> > > > >>>>>> https://github.com/rawkintrevo
>>> > > > >>>>>> http://stackexchange.com/users/3002022/rawkintrevo
>>> > > > >>>>>> http://trevorgrant.org
>>> > > > >>>>>>
>>> > > > >>>>>> "Fortunate is he, who is able to know the causes of things."
>>> > > > -Virgil
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>> On Mon, May 16, 2016 at 4:26 PM, Trevor Grant <
>>> > > > >>>> trevor.d.gr...@gmail.com
>>> > > > >>>>>> <mailto:trevor.d.gr...@gmail.com>> wrote:
>>> > > > >>>>>> sorry for double email but are you thinking visualization
>>> should
>>> > > be
>>> > > > a
>>> > > > >>>>>> library internal to mahout or should we leverage zeppelins
>>> > > > >>>> visualization
>>> > > > >>>>>> capabilities?
>>> > > > >>>>>>
>>> > > > >>>>>> Also, should we move this discussion to dev?
>>> > > > >>>>>>
>>> > > > >>>>>> tg
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>> Trevor Grant
>>> > > > >>>>>> Data Scientist
>>> > > > >>>>>> https://github.com/rawkintrevo
>>> > > > >>>>>> http://stackexchange.com/users/3002022/rawkintrevo
>>> > > > >>>>>> http://trevorgrant.org
>>> > > > >>>>>>
>>> > > > >>>>>> "Fortunate is he, who is able to know the causes of things."
>>> > > > -Virgil
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>> On Mon, May 16, 2016 at 4:14 PM, Andrew Palumbo <
>>> > > ap....@outlook.com
>>> > > > >>>>>> <mailto:ap....@outlook.com>> wrote:
>>> > > > >>>>>>
>>> > > > >>>>>> Sorry- to be a little more clear,  Part of what we're
>>> trying to
>>> > is
>>> > > > to
>>> > > > >>>> get
>>> > > > >>>>>> the new plotting features integrated with Zeppelin. We plan
>>> on
>>> > > > adding
>>> > > > >>>>> more
>>> > > > >>>>>> advanced plotting.
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>> ________________________________
>>> > > > >>>>>> From: Andrew Palumbo <ap....@outlook.com<mailto:
>>> > > ap....@outlook.com
>>> > > > >>
>>> > > > >>>>>> Sent: Monday, May 16, 2016 5:04:49 PM
>>> > > > >>>>>> To: Pat Ferrel; Trevor Grant
>>> > > > >>>>>> Cc: Suneel Marthi; Dmitriy Lyubimov
>>> > > > >>>>>> Subject: Re: Intro - Future Mahout - Zeppelin work
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>> Awesome!
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>> most of the hard work was done by Dmitriy[??] , I've just
>>> > reworked
>>> > > > >>> it a
>>> > > > >>>>>> couple of times to keep up with spark's refactoring.
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>> I think that you will also need to include:
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>>  mahout-spark_2.10-0.12.1-SNAPSHOT-dependency-reduced.jar
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>> For the new plotting features that we're working on.
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>> the plotting is still a work in progress, and the grid and
>>> > surface
>>> > > > >>>> plots
>>> > > > >>>>>> are not working properly.  The plots are swing based and can
>>> > > > >>> currently
>>> > > > >>>> be
>>> > > > >>>>>> exported as  PNGs.  There are a few examples on the closed
>>> PR:
>>> > > > >>>>>> https://github.com/apache/mahout/pull/230
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>> There is an example script in
>>> > examples/bin/spark-shell-plot.mscala
>>> > > > >>>>>> (commited to master) :
>>> > > > >>>>>>
>>> > > > >>>
>>> > > > >
>>> > > >
>>> > >
>>> >
>>> https://github.com/apache/mahout/blob/master/examples/bin/spark-shell-plot.mscala
>>> > > > >>>>>>
>>> > > > >>>>>> Thanks!
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>> ________________________________
>>> > > > >>>>>> From: Pat Ferrel <p...@occamsmachete.com<mailto:
>>> > > > p...@occamsmachete.com
>>> > > > >>>>>> Sent: Monday, May 16, 2016 4:54:15 PM
>>> > > > >>>>>> To: Trevor Grant
>>> > > > >>>>>> Cc: Andrew Palumbo; Suneel Marthi; Dmitriy Lyubimov
>>> > > > >>>>>> Subject: Re: Intro - Future Mahout - Zeppelin work
>>> > > > >>>>>>
>>> > > > >>>>>> This is only the beginning. Andy has been using Smile as a
>>> > > > >>>> visualization
>>> > > > >>>>>> lib since it is pretty rich in ML support. We are looking at
>>> > > > >>>> integrating
>>> > > > >>>>>> some of that with Zeppelin then adding code to feed the new
>>> > > > >>>>> visualizations
>>> > > > >>>>>> in Mahout. I’m here because I’m fairly familiar with
>>> AngularJS
>>> > if
>>> > > > >>>> that’s
>>> > > > >>>>>> the way to go. Smile is swing based but can output pngs,
>>> maybe
>>> > > other
>>> > > > >>>>> image
>>> > > > >>>>>> formats—Andy?
>>> > > > >>>>>>
>>> > > > >>>>>> BTW Dmitriy is still very involved but has rouble getting
>>> > > permission
>>> > > > >>> to
>>> > > > >>>>>> donate code.
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>> On May 16, 2016, at 1:45 PM, Trevor Grant <
>>> > > trevor.d.gr...@gmail.com
>>> > > > >>>>>> <mailto:trevor.d.gr...@gmail.com>> wrote:
>>> > > > >>>>>>
>>> > > > >>>>>> Hey Andrew,
>>> > > > >>>>>>
>>> > > > >>>>>> thanks- you basically did all of the hard work for me!
>>> > > > >>>>>>
>>> > > > >>>>>> I've got the linear regression example working from:
>>> > > > >>>>>>
>>> > http://mahout.apache.org/users/sparkbindings/play-with-shell.html
>>> > > > >>>>>>
>>> > > > >>>>>> my java is sketchy at best, i tend to over import. I pulled
>>> in
>>> > the
>>> > > > >>>>>> following jars:
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>
>>> > > > >
>>> > > >
>>> > >
>>> >
>>> org/apache/mahout/mahout-math/0.12.1-SNAPSHOT/mahout-math-0.12.1-SNAPSHOT.jar
>>> > > > >>>>>>
>>> > > > >>>
>>> > > > >
>>> > > >
>>> > >
>>> >
>>> org/apache/mahout/mahout-math-scala_2.10/0.12.1-SNAPSHOT/mahout-math-scala_2.10-0.12.1-SNAPSHOT.jar
>>> > > > >>>>>>
>>> > > > >>>
>>> > > > >
>>> > > >
>>> > >
>>> >
>>> org/apache/mahout/mahout-spark_2.10/0.12.1-SNAPSHOT/mahout-spark_2.10-0.12.1-SNAPSHOT.jar
>>> > > > >>>>>>
>>> > > > >>>
>>> > > > >
>>> > > >
>>> > >
>>> >
>>> org/apache/mahout/mahout-spark-shell_2.10/0.12.1-SNAPSHOT/mahout-spark-shell_2.10-0.12.1-SNAPSHOT.jar
>>> > > > >>>>>> I think those are all necessary...  should I be pulling in
>>> more?
>>> > > > >>>>>>
>>> > > > >>>>>> I hate to say it (but will do so bc this isn't public) this
>>> > > > >>> integration
>>> > > > >>>>> is
>>> > > > >>>>>> super easy from a user perspective, almost too easy- eg why
>>> not
>>> > > let
>>> > > > >>> the
>>> > > > >>>>>> user add it themselves...  Add the appropriate maven
>>> artifacts,
>>> > > > >>> restart
>>> > > > >>>>> the
>>> > > > >>>>>> interpreter and run the following in a notebook:
>>> > > > >>>>>> ```
>>> > > > >>>>>> import org.apache.mahout.math._
>>> > > > >>>>>> import org.apache.mahout.math.scalabindings._
>>> > > > >>>>>> import org.apache.mahout.math.drm._
>>> > > > >>>>>> import org.apache.mahout.math.scalabindings.RLikeOps._
>>> > > > >>>>>> import org.apache.mahout.math.drm.RLikeDrmOps._
>>> > > > >>>>>> import org.apache.mahout.sparkbindings._
>>> > > > >>>>>>
>>> > > > >>>>>> implicit val sdc:
>>> > > > >>>> org.apache.mahout.sparkbindings.SparkDistributedContext
>>> > > > >>>>>> = sc2sdc(sc)
>>> > > > >>>>>> ```
>>> > > > >>>>>> Then whatever code you want and you're off to the races...
>>> > > > >>>>>>
>>> > > > >>>>>> that said, adding a build profile like -PsparkMahout and
>>> > creating
>>> > > an
>>> > > > >>>>>> interpretter like %spark.mahout should be fairly straight
>>> > forward.
>>> > > > >>>>>>
>>> > > > >>>>>> Second question, do you have an example that would be more
>>> > > > >>>> 'visualization
>>> > > > >>>>>> friendly'? I could pass the results to Angular or R just to
>>> show
>>> > > off
>>> > > > >>>> how
>>> > > > >>>>> to
>>> > > > >>>>>> do it.
>>> > > > >>>>>>
>>> > > > >>>>>> Which leads back to the question, is this even worth
>>> building a
>>> > > full
>>> > > > >>>>>> interpreter for or just make a really nice blog post with
>>> > examples
>>> > > > on
>>> > > > >>>> how
>>> > > > >>>>>> to integrate with R...?
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>> Trevor Grant
>>> > > > >>>>>> Data Scientist
>>> > > > >>>>>> https://github.com/rawkintrevo
>>> > > > >>>>>> http://stackexchange.com/users/3002022/rawkintrevo
>>> > > > >>>>>> http://trevorgrant.org<http://trevorgrant.org/>
>>> > > > >>>>>>
>>> > > > >>>>>> "Fortunate is he, who is able to know the causes of things."
>>> > > > -Virgil
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>> On Mon, May 16, 2016 at 2:09 PM, Andrew Palumbo <
>>> > > ap....@outlook.com
>>> > > > >>>>>> <mailto:ap....@outlook.com>> wrote:
>>> > > > >>>>>> Hi Trevor, welcome!
>>> > > > >>>>>>
>>> > > > >>>>>> It's great to have you helping out, thanks very much.  I've
>>> > done a
>>> > > > >>> good
>>> > > > >>>>>> amount of work on our mahout spark shell .. so let me know
>>> if
>>> > you
>>> > > > >>> have
>>> > > > >>>>> any
>>> > > > >>>>>> questions there about what we did there..
>>> > > > >>>>>>
>>> > > > >>>>>> Thanks alot!
>>> > > > >>>>>>
>>> > > > >>>>>> Andy
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>> -------- Original message --------
>>> > > > >>>>>> From: Suneel Marthi <smar...@apache.org<mailto:
>>> > smar...@apache.org
>>> > > >>
>>> > > > >>>>>> Date: 05/16/2016 2:44 PM (GMT-05:00)
>>> > > > >>>>>> To: Trevor Grant <trevor.d.gr...@gmail.com<mailto:
>>> > > > >>>>> trevor.d.gr...@gmail.com
>>> > > > >>>>>> Cc: Suneel Marthi <smar...@apache.org<mailto:
>>> smar...@apache.org
>>> > > >>,
>>> > > > >>> Pat
>>> > > > >>>>>> Ferrel <p...@occamsmachete.com<mailto:p...@occamsmachete.com
>>> >>,
>>> > > > Andrew
>>> > > > >>>>>> Palumbo <ap....@outlook.com<mailto:ap....@outlook.com>>
>>> > > > >>>>>> Subject: Re: Intro - Future Mahout - Zeppelin work
>>> > > > >>>>>>
>>> > > > >>>>>> Oh yes, he's around. I see him online.
>>> > > > >>>>>>
>>> > > > >>>>>> On Mon, May 16, 2016 at 2:42 PM, Trevor Grant <
>>> > > > >>>> trevor.d.gr...@gmail.com
>>> > > > >>>>>> <mailto:trevor.d.gr...@gmail.com>> wrote:
>>> > > > >>>>>> Is Dmitriy Lyubimov still around?
>>> > > > >>>>>>
>>> > > > >>>>>> Looks like he created this issue for Zeppelin a while ago.
>>> (The
>>> > > old
>>> > > > >>>> lost
>>> > > > >>>>>> code to which you were referring?)
>>> > > > >>>>>>
>>> > > > >>>>>> https://issues.apache.org/jira/browse/ZEPPELIN-116
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>> tg
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>> Trevor Grant
>>> > > > >>>>>> Data Scientist
>>> > > > >>>>>> https://github.com/rawkintrevo
>>> > > > >>>>>> http://stackexchange.com/users/3002022/rawkintrevo
>>> > > > >>>>>> http://trevorgrant.org<http://trevorgrant.org/>
>>> > > > >>>>>>
>>> > > > >>>>>> "Fortunate is he, who is able to know the causes of things."
>>> > > > -Virgil
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>> On Mon, May 16, 2016 at 1:37 PM, Suneel Marthi <
>>> > > smar...@apache.org
>>> > > > >>>>> <mailto:
>>> > > > >>>>>> smar...@apache.org>> wrote:
>>> > > > >>>>>> Welcome to the party TG !!
>>> > > > >>>>>>
>>> > > > >>>>>> On Mon, May 16, 2016 at 2:28 PM, Trevor Grant <
>>> > > > >>>> trevor.d.gr...@gmail.com
>>> > > > >>>>>> <mailto:trevor.d.gr...@gmail.com>> wrote:
>>> > > > >>>>>> Hey all,
>>> > > > >>>>>>
>>> > > > >>>>>> I'm excited for a chance to help out.  I'm actually getting
>>> > ready
>>> > > to
>>> > > > >>>>>> download now and start playing around.
>>> > > > >>>>>>
>>> > > > >>>>>> I had talked about this briefly but it given a properly
>>> > > functioning
>>> > > > >>>>>> Zeppelin interpreter for Apache Mahout, one could leverage
>>> all
>>> > of
>>> > > > the
>>> > > > >>>>>> Zeppelin visualizations, anything in AngularJS, or anything
>>> in R
>>> > > > >>>> (through
>>> > > > >>>>>> clever use of Zeppelin's Resource Pools).
>>> > > > >>>>>>
>>> > > > >>>>>> I'll work on getting logged in to the slack channel as well.
>>> > > > >>>>>>
>>> > > > >>>>>> Nice to meet you all, looking forward to helping out!
>>> > > > >>>>>>
>>> > > > >>>>>> tg
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>> Trevor Grant
>>> > > > >>>>>> Data Scientist
>>> > > > >>>>>> https://github.com/rawkintrevo
>>> > > > >>>>>> http://stackexchange.com/users/3002022/rawkintrevo
>>> > > > >>>>>> http://trevorgrant.org<http://trevorgrant.org/>
>>> > > > >>>>>>
>>> > > > >>>>>> "Fortunate is he, who is able to know the causes of things."
>>> > > > -Virgil
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>> On Sun, May 15, 2016 at 12:56 PM, Suneel Marthi <
>>> > > smar...@apache.org
>>> > > > >>>>>> <mailto:smar...@apache.org>> wrote:
>>> > > > >>>>>> FYi...
>>> > > > >>>>>> Trevor was there for my talk, so he has some idea of Mahout
>>> > > Samsara.
>>> > > > >>>>>>
>>> > > > >>>>>> On Sun, May 15, 2016 at 1:51 PM, Pat Ferrel <
>>> > > p...@occamsmachete.com
>>> > > > >>>>> <mailto:
>>> > > > >>>>>> p...@occamsmachete.com>> wrote:
>>> > > > >>>>>> Hey Trevor,
>>> > > > >>>>>>
>>> > > > >>>>>> Good to meet you. As you probably know Mahout-Samsara is a
>>> > > > >>>> reincarnation
>>> > > > >>>>>> of the project in a new body, which is less a collection of
>>> > > > >>> algorithms
>>> > > > >>>>> than
>>> > > > >>>>>> a roll-your-own math/algorithm tool. The major benefit is
>>> that
>>> > > > during
>>> > > > >>>>>> experimentation and later in production the code is by
>>> nature
>>> > > > >>> scalable
>>> > > > >>>> on
>>> > > > >>>>>> Spark and Flink. Most of the Mahout DSL is R-like and
>>> supports
>>> > > > tensor
>>> > > > >>>>> math
>>> > > > >>>>>> but we are now looking at streaming online algo support too.
>>> > > > >>>>>>
>>> > > > >>>>>> In any case you probably know we have a Mahout version of
>>> the
>>> > > Spark
>>> > > > >>>>> Shell,
>>> > > > >>>>>> which has been integrated with an old version of Zeppelin
>>> (code
>>> > is
>>> > > > >>>> lost).
>>> > > > >>>>>> Recently Andy has experimented with some very nice
>>> > visualizations
>>> > > of
>>> > > > >>> ML
>>> > > > >>>>>> data (not just analytics data). We as a project are
>>> interested
>>> > in
>>> > > > >>>>> Zeppelin
>>> > > > >>>>>> integration of our shell and graphics. From what I
>>> understand
>>> > the
>>> > > > >>>>> graphics
>>> > > > >>>>>> extension mechanism of Zeppelin is based on AngularJS,
>>> which I
>>> > > have
>>> > > > >>>> some
>>> > > > >>>>>> experience with.
>>> > > > >>>>>>
>>> > > > >>>>>> So, we’d like to start the conversation about how to
>>> proceed. We
>>> > > > >>> would
>>> > > > >>>>>> love some help but will move ahead in any case.
>>> > > > >>>>>>
>>> > > > >>>>>> Pat
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>> On May 15, 2016, at 9:52 AM, Suneel Marthi <
>>> smar...@apache.org
>>> > > > >>> <mailto:
>>> > > > >>>>>> smar...@apache.org>> wrote:
>>> > > > >>>>>>
>>> > > > >>>>>> Hi Trevor,
>>> > > > >>>>>>
>>> > > > >>>>>> Nice meeting u last week in Vancouver.  Per our
>>> conversation, I
>>> > > > >>> wanted
>>> > > > >>>> to
>>> > > > >>>>>> introduce u to Andrew Palumbo (Mahout Chair) and Pat Ferrel
>>> > > (Mahout
>>> > > > >>>> PMC).
>>> > > > >>>>>> As I mentioned in my talk, we are actively looking at
>>> Zeppelin
>>> > > > >>>>> integration
>>> > > > >>>>>> with Mahout (primarily for spark) and would appreciate your
>>> help
>>> > > (as
>>> > > > >>>> also
>>> > > > >>>>>> all things DL and ML).
>>> > > > >>>>>>
>>> > > > >>>>>> We definitely can use all your help as we r revamping the
>>> Mahout
>>> > > > >>>> project
>>> > > > >>>>>> and shedding its legacy MapReduce image.
>>> > > > >>>>>>
>>> > > > >>>>>> I sent u an invite to the Mahout slack channel,
>>> > mahout.apache.org
>>> > > <
>>> > > > >>>>>> http://mahout.apache.org/> - that's where we all hangout
>>> and
>>> > not
>>> > > > >>>> having
>>> > > > >>>>>> to worry about avoiding naughty words.
>>> > > > >>>>>>
>>> > > > >>>>>> Looking forward to working with you
>>> > > > >>>>>>
>>> > > > >>>>>> Suneel
>>> > > > >>>>>>
>>> > > > >>>>>>
>>> > > > >>>>>
>>> > > > >
>>> > > > >
>>> > > > >
>>> > > >
>>> > > >
>>> > >
>>> >
>>>
>>
>>
>

Reply via email to