Re: Feedback on idiomatic clojure

2010-08-20 Thread Btsai
I believe duck-streams is deprecated since clojure 1.2.  You may want
to consider bringing back f-to-seq, which can be simplified slightly
using reader from clojure.java.io:

(ns clojure.example.anagrams
  (:use [clojure.java.io :only (reader)])
  (:gen-class))

(defn f-to-seq [file]
  (with-open [rdr (reader file)]
(doall (line-seq rdr

On Aug 19, 10:39 pm, Damon Snyder  wrote:
> Hi Meikel, Nicolas, and Justin,
> Thank you for the great feedback! I learned a lot. I was puzzled about
> (update-in (update-in)) and after doing that the -> operator makes a
> lot of sense. The reduce is clever and fits nicely as well.
>
> I dropped the function that read in the lines of the file and used
> read-lines instead. Below is what I put together after the feedback.
>
> Thanks,
> Damon
>
> PS- I was mistaken before. The first version was taking over a minute.
> This version takes about 15s.
>
> (ns clojure.example.anagrams
>   (:require clojure.contrib.duck-streams)
>   (:gen-class))
>
> (defn str-sort[string]
>   (when string
>     (apply str (sort string
>
> (defn str-to-lower[string]
>   (when string
>     (.toLowerCase string)))
>
> (defn anagram-add[anagrams akey word]
>   (if (contains? anagrams akey)
>     (-> anagrams
>       (update-in [akey :count] inc)
>       (update-in [akey :words] conj word))
>     (assoc anagrams akey {:count 1 :words [word]})))
>
> (def normalise (comp str-to-lower str-sort))
>
> (defn build-anagrams[words]
>   (reduce #(anagram-add %1 (normalise %2) %2) {} words))
>
> (defn print-anagram[v]
>   (println (str (:count (second v)) " " (first v) ": " (:words (second
> v)
>
> (defn print-anagrams[ana]
>     (doseq [v ana]
>           (print-anagram v)))
>
> (defn anagram-key[elem]
>     (- (:count (second elem
>
> ;(def *words* (f-to-seq "/usr/share/dict/web2"))
> ;(def *anagrams* (sort-by anagram-key (build-anagrams *words*)))
> ;(print-anagrams (take 10 *anagrams*))
>
> (defn -main[file]
>   (time (print-anagrams
>           (take 10
>                 (sort-by anagram-key
>                          (build-anagrams
>                            (clojure.contrib.duck-streams/read-lines
> file)))
>
> On Aug 19, 5:41 am, Meikel Brandmeyer  wrote:
>
>
>
> > Hi,
>
> > here my turn. Comments inline. Hope this helps.
>
> > Sincerely
> > Meikel
>
> > (defn f-to-seq
> >   [file]
> >   (with-open [rdr (java.io.BufferedReader.
> >                     (java.io.FileReader. file))]
> >     (doall (line-seq rdr
> > ; Yes. doall is required here. Alternatively you can wrap the whole
> > thing
> > ; in the with-open. Then you can process also larger files without
> > keeping
> > ; all data in memory. YMMV.
>
> > (defn str-sort
> >   [string]
> >   (when string
> >     (apply str (sort string
> > ; One can also skip the when here. Then nil input returns an empty
> > string.
> > ; Whether that is better depends on the usage of the return value.
> > YMMV.
> > ; (if (nil? x) x ...) is usually written as (when x ...).
>
> > (defn str-to-lower
> >   [string]
> >   (when string
> >     (.toLowerCase string)))
>
> > (defn anagram-add
> >   [anagrams akey word]
> >   (if (contains? anagrams akey)
> >     (-> anagrams
> >       (update-in [akey :count] inc)
> >       (update-in [akey :words] conj word))
> >     (assoc anagrams akey {:count 1 :words [word]})))
> > ; I would prefer vectors over lists to store stuff. You get nice
> > ; things for free, like O(1) appending, O(1) reverse, O(1) random
> > ; access, ...
>
> > (defn word
> >   [s]
> >   (first s))
> > ; or: (def word first)
> > ; not needed for my solution
>
> > (def normalise (comp str-to-lower str-sort))
>
> > (defn build-anagrams
> >   [words]
> >   (reduce #(anagram-add %1 (normalise %2) %2) {} words))
> > ; Looping with an accumulator, just returning the latter when the
> > ; input is exhausted cries for reduce.
>
> > (defn print-anagram
> >   [v]
> >   (println (str (:count (second v)) " " (first v) ": " (:words (second
> > v)
> > ; one could use destructuring to access things.
>
> > (defn print-anagram
> >   [anagram]
> >   (let [[normal-form {:keys [count words]}] anagram]
> >     (println (str count " " normal-form ": " words
>
> > (defn print-anagrams
> >   [ana]
> >   (doseq [v ana]
> >     (print-anagram v)))
> > ; more descriptive names would be good, I guess
>
> > (defn anagram-key
> >   [elem]
> >   (- (:count (second elem
> > ; the minus should take care of the reverse?
>
> > (def *words* (f-to-seq "/usr/share/dict/web2"))
> > (def *anagrams* (sort-by anagram-key (build-anagrams *words*)))
> > (print-anagrams (take 10 *anagrams*))

-- 
You received this message because you are subscribed to the Google
Groups "Clojure" group.
To post to this group, send email to clojure@googlegroups.com
Note that posts from new members are moderated - please be patient with your 
first post.
To unsubscribe from this group, send email to
clojure+unsubscr...@googlegroups.com
For more options, visit this group at
http:

Re: Feedback on idiomatic clojure

2010-08-19 Thread Damon Snyder
Hi Meikel, Nicolas, and Justin,
Thank you for the great feedback! I learned a lot. I was puzzled about
(update-in (update-in)) and after doing that the -> operator makes a
lot of sense. The reduce is clever and fits nicely as well.

I dropped the function that read in the lines of the file and used
read-lines instead. Below is what I put together after the feedback.

Thanks,
Damon

PS- I was mistaken before. The first version was taking over a minute.
This version takes about 15s.

(ns clojure.example.anagrams
  (:require clojure.contrib.duck-streams)
  (:gen-class))

(defn str-sort[string]
  (when string
(apply str (sort string


(defn str-to-lower[string]
  (when string
(.toLowerCase string)))


(defn anagram-add[anagrams akey word]
  (if (contains? anagrams akey)
(-> anagrams
  (update-in [akey :count] inc)
  (update-in [akey :words] conj word))
(assoc anagrams akey {:count 1 :words [word]})))

(def normalise (comp str-to-lower str-sort))

(defn build-anagrams[words]
  (reduce #(anagram-add %1 (normalise %2) %2) {} words))


(defn print-anagram[v]
  (println (str (:count (second v)) " " (first v) ": " (:words (second
v)

(defn print-anagrams[ana]
(doseq [v ana]
  (print-anagram v)))

(defn anagram-key[elem]
(- (:count (second elem


;(def *words* (f-to-seq "/usr/share/dict/web2"))
;(def *anagrams* (sort-by anagram-key (build-anagrams *words*)))
;(print-anagrams (take 10 *anagrams*))

(defn -main[file]
  (time (print-anagrams
  (take 10
(sort-by anagram-key
 (build-anagrams
   (clojure.contrib.duck-streams/read-lines
file)))



On Aug 19, 5:41 am, Meikel Brandmeyer  wrote:
> Hi,
>
> here my turn. Comments inline. Hope this helps.
>
> Sincerely
> Meikel
>
> (defn f-to-seq
>   [file]
>   (with-open [rdr (java.io.BufferedReader.
>                     (java.io.FileReader. file))]
>     (doall (line-seq rdr
> ; Yes. doall is required here. Alternatively you can wrap the whole
> thing
> ; in the with-open. Then you can process also larger files without
> keeping
> ; all data in memory. YMMV.
>
> (defn str-sort
>   [string]
>   (when string
>     (apply str (sort string
> ; One can also skip the when here. Then nil input returns an empty
> string.
> ; Whether that is better depends on the usage of the return value.
> YMMV.
> ; (if (nil? x) x ...) is usually written as (when x ...).
>
> (defn str-to-lower
>   [string]
>   (when string
>     (.toLowerCase string)))
>
> (defn anagram-add
>   [anagrams akey word]
>   (if (contains? anagrams akey)
>     (-> anagrams
>       (update-in [akey :count] inc)
>       (update-in [akey :words] conj word))
>     (assoc anagrams akey {:count 1 :words [word]})))
> ; I would prefer vectors over lists to store stuff. You get nice
> ; things for free, like O(1) appending, O(1) reverse, O(1) random
> ; access, ...
>
> (defn word
>   [s]
>   (first s))
> ; or: (def word first)
> ; not needed for my solution
>
> (def normalise (comp str-to-lower str-sort))
>
> (defn build-anagrams
>   [words]
>   (reduce #(anagram-add %1 (normalise %2) %2) {} words))
> ; Looping with an accumulator, just returning the latter when the
> ; input is exhausted cries for reduce.
>
> (defn print-anagram
>   [v]
>   (println (str (:count (second v)) " " (first v) ": " (:words (second
> v)
> ; one could use destructuring to access things.
>
> (defn print-anagram
>   [anagram]
>   (let [[normal-form {:keys [count words]}] anagram]
>     (println (str count " " normal-form ": " words
>
> (defn print-anagrams
>   [ana]
>   (doseq [v ana]
>     (print-anagram v)))
> ; more descriptive names would be good, I guess
>
> (defn anagram-key
>   [elem]
>   (- (:count (second elem
> ; the minus should take care of the reverse?
>
> (def *words* (f-to-seq "/usr/share/dict/web2"))
> (def *anagrams* (sort-by anagram-key (build-anagrams *words*)))
> (print-anagrams (take 10 *anagrams*))

-- 
You received this message because you are subscribed to the Google
Groups "Clojure" group.
To post to this group, send email to clojure@googlegroups.com
Note that posts from new members are moderated - please be patient with your 
first post.
To unsubscribe from this group, send email to
clojure+unsubscr...@googlegroups.com
For more options, visit this group at
http://groups.google.com/group/clojure?hl=en


Re: Feedback on idiomatic clojure

2010-08-19 Thread Meikel Brandmeyer
Hi.

Am 19.08.2010 um 22:14 schrieb Nicolas Oury:

>> in because I was getting null pointer exceptions when the string was
>> null. What is the difference between {:count 1 :words (list words)}
>> and a hash-map? I was under the impression that {} created a hash.
>> 
> I just find it easier to read because it looks like a value and not a
> function call.

Another difference is that in this case it creates not a hash map but an array 
map. Rule of thumb: up to 8 keys => array map, more than 8 => hash map. The 
array map is simply a short alist, not a full blown hash map. For such small 
maps this is faster than pulling at the hash cannon. So the difference is not 
limited to syntax alone.

Sincerely
Meikel

-- 
You received this message because you are subscribed to the Google
Groups "Clojure" group.
To post to this group, send email to clojure@googlegroups.com
Note that posts from new members are moderated - please be patient with your 
first post.
To unsubscribe from this group, send email to
clojure+unsubscr...@googlegroups.com
For more options, visit this group at
http://groups.google.com/group/clojure?hl=en


Fwd: Feedback on idiomatic clojure

2010-08-19 Thread Nicolas Oury
Damon reply to me and not the list, so I forward.

On Thu, Aug 19, 2010 at 9:09 PM, Damon Snyder  wrote:
> Hi Nicolas,
> Thanks for the suggestions. Regarding the first one: ah, I see. That
> is a nice compact way to test to see if the str is nil. I added that

I reckon that Meikel's suggestion of using when is surely better in
this situation (for a and).
But the (or something? default) or (or known? recompute?) can be useful.

> in because I was getting null pointer exceptions when the string was
> null. What is the difference between {:count 1 :words (list words)}
> and a hash-map? I was under the impression that {} created a hash.
>
I just find it easier to read because it looks like a value and not a
function call.

-- 
You received this message because you are subscribed to the Google
Groups "Clojure" group.
To post to this group, send email to clojure@googlegroups.com
Note that posts from new members are moderated - please be patient with your 
first post.
To unsubscribe from this group, send email to
clojure+unsubscr...@googlegroups.com
For more options, visit this group at
http://groups.google.com/group/clojure?hl=en


Re: Feedback on idiomatic clojure

2010-08-19 Thread Justin Kramer
Meikel's suggestions are all good and I would follow them.

There are a number of built-in functions you can take advantage of if
you're using 1.2 (they're also available in contrib for 1.1):

clojure.string/lower-case
clojure.java.io/reader -- obviates java.io.BufferedReader and friends:
(reader "/usr/share/dict/words")

As a point of comparison, I did a similar code kata recently. I've
posted it here, with a few lines added to show the top 10:

http://gist.github.com/537945

It runs in a couple seconds on my machine. For maximum speed,
transients might help.

Justin

On Aug 18, 6:46 pm, Damon Snyder  wrote:
> Hi Everyone,
> I'm looking for some feedback on a small get-your-feet wet program I
> wrote in clojure (my first). In particular, I hope to get feedback on
> how to better make use of clojure's data structures and functional
> programming in general.
>
> Below is the toy program and the sample output. It reads in words from
> the OS X dictionary file, catalogs the anagrams, and then prints out
> the top ten by frequency. Although performance is not my primary
> concern here, it is a bit slow. It takes about 15s on my mac book
> pro.
>
> As an example, is this the "proper" way to update multiple fields at
> once in a nested hash-map:
>
> (update-in (update-in anagrams [akey :count] inc) [akey :words] conj
> word)))
>
> I understand this as returning an copy of an updated copy of an
> updated copy.
>
> Any feedback would be appreciated. Thanks,
> Damon
>
> ## begin
> (defn f-to-seq[file]
>   (with-open [rdr (java.io.BufferedReader.
>                     (java.io.FileReader. file))]
>     (doall (line-seq rdr
>
> (defn str-sort[str]
>   (if (nil? str)
>     str
>   (String. (into-array (. Character TYPE) (sort str)
>
> (defn str-to-lower[str]
>   (if (nil? str)
>     str
>     (.toLowerCase str)))
>
> (defn anagram-add[anagrams akey word]
>   (if (empty? (get anagrams akey))
>     (assoc anagrams akey (hash-map :count 1, :words (list word)))
>     (update-in (update-in anagrams [akey :count] inc) [akey :words]
> conj word)))
>
> (defn word[seq] (first seq))
>
> (defn build-anagrams[seq]
>   (loop [seq seq
>          akey (str-sort (str-to-lower (word seq)))
>          anagrams {}]
>     (if (empty? seq)
>       anagrams
>       (recur (rest seq)
>              (str-sort (str-to-lower (word (rest seq
>              (anagram-add anagrams akey (word seq))
>
> (defn print-anagram[v]
>   (println (str (:count (first (rest v))) " " (first v) ": " (:words
> (first (rest v))
>
> (defn print-anagrams[ana]
>   (doseq [v ana]
>     (print-anagram v)))
>
> (defn anagram-key[elem] (:count (first (rest elem
>
> (def *words* (f-to-seq "/usr/share/dict/web2"))
> (def *anagrams* (sort-by anagram-key (build-anagrams *words*)))
> (print-anagrams (take 10 (reverse *anagrams*)))
>
> ## output
> 11 agnor: ("Ronga" "rogan" "organ" "orang" "Orang" "nagor" "groan"
> "grano" "goran" "argon" "angor")
> 10 aelps: ("speal" "spale" "slape" "sepal" "saple" "salep" "Pales"
> "Lepas" "lapse" "Elaps")
> 9 eerst: ("tsere" "terse" "stree" "stere" "steer" "reset" "reest"
> "estre" "ester")
> 9 aeerst: ("teaser" "staree" "seater" "saeter" "reseat" "Eastre"
> "easter" "Easter" "asteer")
> 9 aacinr: ("narica" "crania" "Crania" "carina" "Carian" "canari"
> "Canari" "arnica" "acinar")
> 9 acert: ("trace" "recta" "react" "creta" "creat" "crate" "cater"
> "carte" "caret")
> 8 ailr: ("rial" "rail" "lira" "liar" "lari" "Lari" "lair" "aril")
> 8 aelpt: ("tepal" "pleat" "plate" "petal" "pelta" "patel" "palet"
> "leapt")
> 8 airst: ("Trias" "tisar" "tarsi" "stria" "stair" "sitar" "astir"
> "arist")
> 8 aemrt: ("Trema" "trame" "terma" "tamer" "ramet" "metra" "mater"
> "armet")

-- 
You received this message because you are subscribed to the Google
Groups "Clojure" group.
To post to this group, send email to clojure@googlegroups.com
Note that posts from new members are moderated - please be patient with your 
first post.
To unsubscribe from this group, send email to
clojure+unsubscr...@googlegroups.com
For more options, visit this group at
http://groups.google.com/group/clojure?hl=en


Re: Feedback on idiomatic clojure

2010-08-19 Thread Meikel Brandmeyer
Hi,

here my turn. Comments inline. Hope this helps.

Sincerely
Meikel

(defn f-to-seq
  [file]
  (with-open [rdr (java.io.BufferedReader.
(java.io.FileReader. file))]
(doall (line-seq rdr
; Yes. doall is required here. Alternatively you can wrap the whole
thing
; in the with-open. Then you can process also larger files without
keeping
; all data in memory. YMMV.


(defn str-sort
  [string]
  (when string
(apply str (sort string
; One can also skip the when here. Then nil input returns an empty
string.
; Whether that is better depends on the usage of the return value.
YMMV.
; (if (nil? x) x ...) is usually written as (when x ...).

(defn str-to-lower
  [string]
  (when string
(.toLowerCase string)))

(defn anagram-add
  [anagrams akey word]
  (if (contains? anagrams akey)
(-> anagrams
  (update-in [akey :count] inc)
  (update-in [akey :words] conj word))
(assoc anagrams akey {:count 1 :words [word]})))
; I would prefer vectors over lists to store stuff. You get nice
; things for free, like O(1) appending, O(1) reverse, O(1) random
; access, ...

(defn word
  [s]
  (first s))
; or: (def word first)
; not needed for my solution

(def normalise (comp str-to-lower str-sort))

(defn build-anagrams
  [words]
  (reduce #(anagram-add %1 (normalise %2) %2) {} words))
; Looping with an accumulator, just returning the latter when the
; input is exhausted cries for reduce.

(defn print-anagram
  [v]
  (println (str (:count (second v)) " " (first v) ": " (:words (second
v)
; one could use destructuring to access things.

(defn print-anagram
  [anagram]
  (let [[normal-form {:keys [count words]}] anagram]
(println (str count " " normal-form ": " words

(defn print-anagrams
  [ana]
  (doseq [v ana]
(print-anagram v)))
; more descriptive names would be good, I guess

(defn anagram-key
  [elem]
  (- (:count (second elem
; the minus should take care of the reverse?

(def *words* (f-to-seq "/usr/share/dict/web2"))
(def *anagrams* (sort-by anagram-key (build-anagrams *words*)))
(print-anagrams (take 10 *anagrams*))

-- 
You received this message because you are subscribed to the Google
Groups "Clojure" group.
To post to this group, send email to clojure@googlegroups.com
Note that posts from new members are moderated - please be patient with your 
first post.
To unsubscribe from this group, send email to
clojure+unsubscr...@googlegroups.com
For more options, visit this group at
http://groups.google.com/group/clojure?hl=en


Re: Feedback on idiomatic clojure

2010-08-19 Thread Nicolas Oury
A few remarks:
>
> ## begin
> (defn f-to-seq[file]
>  (with-open [rdr (java.io.BufferedReader.
>                    (java.io.FileReader. file))]
>    (doall (line-seq rdr
>
Do you need the doall?

> (defn str-sort[str]
>  (if (nil? str)
>    str
>  (String. (into-array (. Character TYPE) (sort str)
>
(and str
   (String. (into-array (. Character TYPE) (sort str

(if str is nil this will return nil)


> (defn anagram-add[anagrams akey word]
>  (if (empty? (get anagrams akey))
>    (assoc anagrams akey (hash-map :count 1, :words (list word)))
>    (update-in (update-in anagrams [akey :count] inc) [akey :words]
> conj word)))
>
You could use a literal {:count 1 :words (list words)} instead of hash-map

I believe that (get anagrams akey) will give nil in case there is known.
If it is the case (or (get anagrams key) ...) will be more readable.

(update-in ...) =>

(-> anagrams
  (update-in [akey :count] inc)
  (update-in [akey :words] conj word))

I will have a look to the rest of the program later.

Best,

Nicolas.

-- 
You received this message because you are subscribed to the Google
Groups "Clojure" group.
To post to this group, send email to clojure@googlegroups.com
Note that posts from new members are moderated - please be patient with your 
first post.
To unsubscribe from this group, send email to
clojure+unsubscr...@googlegroups.com
For more options, visit this group at
http://groups.google.com/group/clojure?hl=en


Feedback on idiomatic clojure

2010-08-19 Thread Damon Snyder
Hi Everyone,
I'm looking for some feedback on a small get-your-feet wet program I
wrote in clojure (my first). In particular, I hope to get feedback on
how to better make use of clojure's data structures and functional
programming in general.

Below is the toy program and the sample output. It reads in words from
the OS X dictionary file, catalogs the anagrams, and then prints out
the top ten by frequency. Although performance is not my primary
concern here, it is a bit slow. It takes about 15s on my mac book
pro.

As an example, is this the "proper" way to update multiple fields at
once in a nested hash-map:

(update-in (update-in anagrams [akey :count] inc) [akey :words] conj
word)))

I understand this as returning an copy of an updated copy of an
updated copy.

Any feedback would be appreciated. Thanks,
Damon

## begin
(defn f-to-seq[file]
  (with-open [rdr (java.io.BufferedReader.
(java.io.FileReader. file))]
(doall (line-seq rdr

(defn str-sort[str]
  (if (nil? str)
str
  (String. (into-array (. Character TYPE) (sort str)

(defn str-to-lower[str]
  (if (nil? str)
str
(.toLowerCase str)))


(defn anagram-add[anagrams akey word]
  (if (empty? (get anagrams akey))
(assoc anagrams akey (hash-map :count 1, :words (list word)))
(update-in (update-in anagrams [akey :count] inc) [akey :words]
conj word)))

(defn word[seq] (first seq))

(defn build-anagrams[seq]
  (loop [seq seq
 akey (str-sort (str-to-lower (word seq)))
 anagrams {}]
(if (empty? seq)
  anagrams
  (recur (rest seq)
 (str-sort (str-to-lower (word (rest seq
 (anagram-add anagrams akey (word seq))


(defn print-anagram[v]
  (println (str (:count (first (rest v))) " " (first v) ": " (:words
(first (rest v))

(defn print-anagrams[ana]
  (doseq [v ana]
(print-anagram v)))

(defn anagram-key[elem] (:count (first (rest elem

(def *words* (f-to-seq "/usr/share/dict/web2"))
(def *anagrams* (sort-by anagram-key (build-anagrams *words*)))
(print-anagrams (take 10 (reverse *anagrams*)))

## output
11 agnor: ("Ronga" "rogan" "organ" "orang" "Orang" "nagor" "groan"
"grano" "goran" "argon" "angor")
10 aelps: ("speal" "spale" "slape" "sepal" "saple" "salep" "Pales"
"Lepas" "lapse" "Elaps")
9 eerst: ("tsere" "terse" "stree" "stere" "steer" "reset" "reest"
"estre" "ester")
9 aeerst: ("teaser" "staree" "seater" "saeter" "reseat" "Eastre"
"easter" "Easter" "asteer")
9 aacinr: ("narica" "crania" "Crania" "carina" "Carian" "canari"
"Canari" "arnica" "acinar")
9 acert: ("trace" "recta" "react" "creta" "creat" "crate" "cater"
"carte" "caret")
8 ailr: ("rial" "rail" "lira" "liar" "lari" "Lari" "lair" "aril")
8 aelpt: ("tepal" "pleat" "plate" "petal" "pelta" "patel" "palet"
"leapt")
8 airst: ("Trias" "tisar" "tarsi" "stria" "stair" "sitar" "astir"
"arist")
8 aemrt: ("Trema" "trame" "terma" "tamer" "ramet" "metra" "mater"
"armet")

-- 
You received this message because you are subscribed to the Google
Groups "Clojure" group.
To post to this group, send email to clojure@googlegroups.com
Note that posts from new members are moderated - please be patient with your 
first post.
To unsubscribe from this group, send email to
clojure+unsubscr...@googlegroups.com
For more options, visit this group at
http://groups.google.com/group/clojure?hl=en