Re: Feedback on idiomatic clojure
I believe duck-streams is deprecated since clojure 1.2. You may want to consider bringing back f-to-seq, which can be simplified slightly using reader from clojure.java.io: (ns clojure.example.anagrams (:use [clojure.java.io :only (reader)]) (:gen-class)) (defn f-to-seq [file] (with-open [rdr (reader file)] (doall (line-seq rdr On Aug 19, 10:39 pm, Damon Snyder wrote: > Hi Meikel, Nicolas, and Justin, > Thank you for the great feedback! I learned a lot. I was puzzled about > (update-in (update-in)) and after doing that the -> operator makes a > lot of sense. The reduce is clever and fits nicely as well. > > I dropped the function that read in the lines of the file and used > read-lines instead. Below is what I put together after the feedback. > > Thanks, > Damon > > PS- I was mistaken before. The first version was taking over a minute. > This version takes about 15s. > > (ns clojure.example.anagrams > (:require clojure.contrib.duck-streams) > (:gen-class)) > > (defn str-sort[string] > (when string > (apply str (sort string > > (defn str-to-lower[string] > (when string > (.toLowerCase string))) > > (defn anagram-add[anagrams akey word] > (if (contains? anagrams akey) > (-> anagrams > (update-in [akey :count] inc) > (update-in [akey :words] conj word)) > (assoc anagrams akey {:count 1 :words [word]}))) > > (def normalise (comp str-to-lower str-sort)) > > (defn build-anagrams[words] > (reduce #(anagram-add %1 (normalise %2) %2) {} words)) > > (defn print-anagram[v] > (println (str (:count (second v)) " " (first v) ": " (:words (second > v) > > (defn print-anagrams[ana] > (doseq [v ana] > (print-anagram v))) > > (defn anagram-key[elem] > (- (:count (second elem > > ;(def *words* (f-to-seq "/usr/share/dict/web2")) > ;(def *anagrams* (sort-by anagram-key (build-anagrams *words*))) > ;(print-anagrams (take 10 *anagrams*)) > > (defn -main[file] > (time (print-anagrams > (take 10 > (sort-by anagram-key > (build-anagrams > (clojure.contrib.duck-streams/read-lines > file))) > > On Aug 19, 5:41 am, Meikel Brandmeyer wrote: > > > > > Hi, > > > here my turn. Comments inline. Hope this helps. > > > Sincerely > > Meikel > > > (defn f-to-seq > > [file] > > (with-open [rdr (java.io.BufferedReader. > > (java.io.FileReader. file))] > > (doall (line-seq rdr > > ; Yes. doall is required here. Alternatively you can wrap the whole > > thing > > ; in the with-open. Then you can process also larger files without > > keeping > > ; all data in memory. YMMV. > > > (defn str-sort > > [string] > > (when string > > (apply str (sort string > > ; One can also skip the when here. Then nil input returns an empty > > string. > > ; Whether that is better depends on the usage of the return value. > > YMMV. > > ; (if (nil? x) x ...) is usually written as (when x ...). > > > (defn str-to-lower > > [string] > > (when string > > (.toLowerCase string))) > > > (defn anagram-add > > [anagrams akey word] > > (if (contains? anagrams akey) > > (-> anagrams > > (update-in [akey :count] inc) > > (update-in [akey :words] conj word)) > > (assoc anagrams akey {:count 1 :words [word]}))) > > ; I would prefer vectors over lists to store stuff. You get nice > > ; things for free, like O(1) appending, O(1) reverse, O(1) random > > ; access, ... > > > (defn word > > [s] > > (first s)) > > ; or: (def word first) > > ; not needed for my solution > > > (def normalise (comp str-to-lower str-sort)) > > > (defn build-anagrams > > [words] > > (reduce #(anagram-add %1 (normalise %2) %2) {} words)) > > ; Looping with an accumulator, just returning the latter when the > > ; input is exhausted cries for reduce. > > > (defn print-anagram > > [v] > > (println (str (:count (second v)) " " (first v) ": " (:words (second > > v) > > ; one could use destructuring to access things. > > > (defn print-anagram > > [anagram] > > (let [[normal-form {:keys [count words]}] anagram] > > (println (str count " " normal-form ": " words > > > (defn print-anagrams > > [ana] > > (doseq [v ana] > > (print-anagram v))) > > ; more descriptive names would be good, I guess > > > (defn anagram-key > > [elem] > > (- (:count (second elem > > ; the minus should take care of the reverse? > > > (def *words* (f-to-seq "/usr/share/dict/web2")) > > (def *anagrams* (sort-by anagram-key (build-anagrams *words*))) > > (print-anagrams (take 10 *anagrams*)) -- You received this message because you are subscribed to the Google Groups "Clojure" group. To post to this group, send email to clojure@googlegroups.com Note that posts from new members are moderated - please be patient with your first post. To unsubscribe from this group, send email to clojure+unsubscr...@googlegroups.com For more options, visit this group at http:
Re: Feedback on idiomatic clojure
Hi Meikel, Nicolas, and Justin, Thank you for the great feedback! I learned a lot. I was puzzled about (update-in (update-in)) and after doing that the -> operator makes a lot of sense. The reduce is clever and fits nicely as well. I dropped the function that read in the lines of the file and used read-lines instead. Below is what I put together after the feedback. Thanks, Damon PS- I was mistaken before. The first version was taking over a minute. This version takes about 15s. (ns clojure.example.anagrams (:require clojure.contrib.duck-streams) (:gen-class)) (defn str-sort[string] (when string (apply str (sort string (defn str-to-lower[string] (when string (.toLowerCase string))) (defn anagram-add[anagrams akey word] (if (contains? anagrams akey) (-> anagrams (update-in [akey :count] inc) (update-in [akey :words] conj word)) (assoc anagrams akey {:count 1 :words [word]}))) (def normalise (comp str-to-lower str-sort)) (defn build-anagrams[words] (reduce #(anagram-add %1 (normalise %2) %2) {} words)) (defn print-anagram[v] (println (str (:count (second v)) " " (first v) ": " (:words (second v) (defn print-anagrams[ana] (doseq [v ana] (print-anagram v))) (defn anagram-key[elem] (- (:count (second elem ;(def *words* (f-to-seq "/usr/share/dict/web2")) ;(def *anagrams* (sort-by anagram-key (build-anagrams *words*))) ;(print-anagrams (take 10 *anagrams*)) (defn -main[file] (time (print-anagrams (take 10 (sort-by anagram-key (build-anagrams (clojure.contrib.duck-streams/read-lines file))) On Aug 19, 5:41 am, Meikel Brandmeyer wrote: > Hi, > > here my turn. Comments inline. Hope this helps. > > Sincerely > Meikel > > (defn f-to-seq > [file] > (with-open [rdr (java.io.BufferedReader. > (java.io.FileReader. file))] > (doall (line-seq rdr > ; Yes. doall is required here. Alternatively you can wrap the whole > thing > ; in the with-open. Then you can process also larger files without > keeping > ; all data in memory. YMMV. > > (defn str-sort > [string] > (when string > (apply str (sort string > ; One can also skip the when here. Then nil input returns an empty > string. > ; Whether that is better depends on the usage of the return value. > YMMV. > ; (if (nil? x) x ...) is usually written as (when x ...). > > (defn str-to-lower > [string] > (when string > (.toLowerCase string))) > > (defn anagram-add > [anagrams akey word] > (if (contains? anagrams akey) > (-> anagrams > (update-in [akey :count] inc) > (update-in [akey :words] conj word)) > (assoc anagrams akey {:count 1 :words [word]}))) > ; I would prefer vectors over lists to store stuff. You get nice > ; things for free, like O(1) appending, O(1) reverse, O(1) random > ; access, ... > > (defn word > [s] > (first s)) > ; or: (def word first) > ; not needed for my solution > > (def normalise (comp str-to-lower str-sort)) > > (defn build-anagrams > [words] > (reduce #(anagram-add %1 (normalise %2) %2) {} words)) > ; Looping with an accumulator, just returning the latter when the > ; input is exhausted cries for reduce. > > (defn print-anagram > [v] > (println (str (:count (second v)) " " (first v) ": " (:words (second > v) > ; one could use destructuring to access things. > > (defn print-anagram > [anagram] > (let [[normal-form {:keys [count words]}] anagram] > (println (str count " " normal-form ": " words > > (defn print-anagrams > [ana] > (doseq [v ana] > (print-anagram v))) > ; more descriptive names would be good, I guess > > (defn anagram-key > [elem] > (- (:count (second elem > ; the minus should take care of the reverse? > > (def *words* (f-to-seq "/usr/share/dict/web2")) > (def *anagrams* (sort-by anagram-key (build-anagrams *words*))) > (print-anagrams (take 10 *anagrams*)) -- You received this message because you are subscribed to the Google Groups "Clojure" group. To post to this group, send email to clojure@googlegroups.com Note that posts from new members are moderated - please be patient with your first post. To unsubscribe from this group, send email to clojure+unsubscr...@googlegroups.com For more options, visit this group at http://groups.google.com/group/clojure?hl=en
Re: Feedback on idiomatic clojure
Hi. Am 19.08.2010 um 22:14 schrieb Nicolas Oury: >> in because I was getting null pointer exceptions when the string was >> null. What is the difference between {:count 1 :words (list words)} >> and a hash-map? I was under the impression that {} created a hash. >> > I just find it easier to read because it looks like a value and not a > function call. Another difference is that in this case it creates not a hash map but an array map. Rule of thumb: up to 8 keys => array map, more than 8 => hash map. The array map is simply a short alist, not a full blown hash map. For such small maps this is faster than pulling at the hash cannon. So the difference is not limited to syntax alone. Sincerely Meikel -- You received this message because you are subscribed to the Google Groups "Clojure" group. To post to this group, send email to clojure@googlegroups.com Note that posts from new members are moderated - please be patient with your first post. To unsubscribe from this group, send email to clojure+unsubscr...@googlegroups.com For more options, visit this group at http://groups.google.com/group/clojure?hl=en
Fwd: Feedback on idiomatic clojure
Damon reply to me and not the list, so I forward. On Thu, Aug 19, 2010 at 9:09 PM, Damon Snyder wrote: > Hi Nicolas, > Thanks for the suggestions. Regarding the first one: ah, I see. That > is a nice compact way to test to see if the str is nil. I added that I reckon that Meikel's suggestion of using when is surely better in this situation (for a and). But the (or something? default) or (or known? recompute?) can be useful. > in because I was getting null pointer exceptions when the string was > null. What is the difference between {:count 1 :words (list words)} > and a hash-map? I was under the impression that {} created a hash. > I just find it easier to read because it looks like a value and not a function call. -- You received this message because you are subscribed to the Google Groups "Clojure" group. To post to this group, send email to clojure@googlegroups.com Note that posts from new members are moderated - please be patient with your first post. To unsubscribe from this group, send email to clojure+unsubscr...@googlegroups.com For more options, visit this group at http://groups.google.com/group/clojure?hl=en
Re: Feedback on idiomatic clojure
Meikel's suggestions are all good and I would follow them. There are a number of built-in functions you can take advantage of if you're using 1.2 (they're also available in contrib for 1.1): clojure.string/lower-case clojure.java.io/reader -- obviates java.io.BufferedReader and friends: (reader "/usr/share/dict/words") As a point of comparison, I did a similar code kata recently. I've posted it here, with a few lines added to show the top 10: http://gist.github.com/537945 It runs in a couple seconds on my machine. For maximum speed, transients might help. Justin On Aug 18, 6:46 pm, Damon Snyder wrote: > Hi Everyone, > I'm looking for some feedback on a small get-your-feet wet program I > wrote in clojure (my first). In particular, I hope to get feedback on > how to better make use of clojure's data structures and functional > programming in general. > > Below is the toy program and the sample output. It reads in words from > the OS X dictionary file, catalogs the anagrams, and then prints out > the top ten by frequency. Although performance is not my primary > concern here, it is a bit slow. It takes about 15s on my mac book > pro. > > As an example, is this the "proper" way to update multiple fields at > once in a nested hash-map: > > (update-in (update-in anagrams [akey :count] inc) [akey :words] conj > word))) > > I understand this as returning an copy of an updated copy of an > updated copy. > > Any feedback would be appreciated. Thanks, > Damon > > ## begin > (defn f-to-seq[file] > (with-open [rdr (java.io.BufferedReader. > (java.io.FileReader. file))] > (doall (line-seq rdr > > (defn str-sort[str] > (if (nil? str) > str > (String. (into-array (. Character TYPE) (sort str) > > (defn str-to-lower[str] > (if (nil? str) > str > (.toLowerCase str))) > > (defn anagram-add[anagrams akey word] > (if (empty? (get anagrams akey)) > (assoc anagrams akey (hash-map :count 1, :words (list word))) > (update-in (update-in anagrams [akey :count] inc) [akey :words] > conj word))) > > (defn word[seq] (first seq)) > > (defn build-anagrams[seq] > (loop [seq seq > akey (str-sort (str-to-lower (word seq))) > anagrams {}] > (if (empty? seq) > anagrams > (recur (rest seq) > (str-sort (str-to-lower (word (rest seq > (anagram-add anagrams akey (word seq)) > > (defn print-anagram[v] > (println (str (:count (first (rest v))) " " (first v) ": " (:words > (first (rest v)) > > (defn print-anagrams[ana] > (doseq [v ana] > (print-anagram v))) > > (defn anagram-key[elem] (:count (first (rest elem > > (def *words* (f-to-seq "/usr/share/dict/web2")) > (def *anagrams* (sort-by anagram-key (build-anagrams *words*))) > (print-anagrams (take 10 (reverse *anagrams*))) > > ## output > 11 agnor: ("Ronga" "rogan" "organ" "orang" "Orang" "nagor" "groan" > "grano" "goran" "argon" "angor") > 10 aelps: ("speal" "spale" "slape" "sepal" "saple" "salep" "Pales" > "Lepas" "lapse" "Elaps") > 9 eerst: ("tsere" "terse" "stree" "stere" "steer" "reset" "reest" > "estre" "ester") > 9 aeerst: ("teaser" "staree" "seater" "saeter" "reseat" "Eastre" > "easter" "Easter" "asteer") > 9 aacinr: ("narica" "crania" "Crania" "carina" "Carian" "canari" > "Canari" "arnica" "acinar") > 9 acert: ("trace" "recta" "react" "creta" "creat" "crate" "cater" > "carte" "caret") > 8 ailr: ("rial" "rail" "lira" "liar" "lari" "Lari" "lair" "aril") > 8 aelpt: ("tepal" "pleat" "plate" "petal" "pelta" "patel" "palet" > "leapt") > 8 airst: ("Trias" "tisar" "tarsi" "stria" "stair" "sitar" "astir" > "arist") > 8 aemrt: ("Trema" "trame" "terma" "tamer" "ramet" "metra" "mater" > "armet") -- You received this message because you are subscribed to the Google Groups "Clojure" group. To post to this group, send email to clojure@googlegroups.com Note that posts from new members are moderated - please be patient with your first post. To unsubscribe from this group, send email to clojure+unsubscr...@googlegroups.com For more options, visit this group at http://groups.google.com/group/clojure?hl=en
Re: Feedback on idiomatic clojure
Hi, here my turn. Comments inline. Hope this helps. Sincerely Meikel (defn f-to-seq [file] (with-open [rdr (java.io.BufferedReader. (java.io.FileReader. file))] (doall (line-seq rdr ; Yes. doall is required here. Alternatively you can wrap the whole thing ; in the with-open. Then you can process also larger files without keeping ; all data in memory. YMMV. (defn str-sort [string] (when string (apply str (sort string ; One can also skip the when here. Then nil input returns an empty string. ; Whether that is better depends on the usage of the return value. YMMV. ; (if (nil? x) x ...) is usually written as (when x ...). (defn str-to-lower [string] (when string (.toLowerCase string))) (defn anagram-add [anagrams akey word] (if (contains? anagrams akey) (-> anagrams (update-in [akey :count] inc) (update-in [akey :words] conj word)) (assoc anagrams akey {:count 1 :words [word]}))) ; I would prefer vectors over lists to store stuff. You get nice ; things for free, like O(1) appending, O(1) reverse, O(1) random ; access, ... (defn word [s] (first s)) ; or: (def word first) ; not needed for my solution (def normalise (comp str-to-lower str-sort)) (defn build-anagrams [words] (reduce #(anagram-add %1 (normalise %2) %2) {} words)) ; Looping with an accumulator, just returning the latter when the ; input is exhausted cries for reduce. (defn print-anagram [v] (println (str (:count (second v)) " " (first v) ": " (:words (second v) ; one could use destructuring to access things. (defn print-anagram [anagram] (let [[normal-form {:keys [count words]}] anagram] (println (str count " " normal-form ": " words (defn print-anagrams [ana] (doseq [v ana] (print-anagram v))) ; more descriptive names would be good, I guess (defn anagram-key [elem] (- (:count (second elem ; the minus should take care of the reverse? (def *words* (f-to-seq "/usr/share/dict/web2")) (def *anagrams* (sort-by anagram-key (build-anagrams *words*))) (print-anagrams (take 10 *anagrams*)) -- You received this message because you are subscribed to the Google Groups "Clojure" group. To post to this group, send email to clojure@googlegroups.com Note that posts from new members are moderated - please be patient with your first post. To unsubscribe from this group, send email to clojure+unsubscr...@googlegroups.com For more options, visit this group at http://groups.google.com/group/clojure?hl=en
Re: Feedback on idiomatic clojure
A few remarks: > > ## begin > (defn f-to-seq[file] > (with-open [rdr (java.io.BufferedReader. > (java.io.FileReader. file))] > (doall (line-seq rdr > Do you need the doall? > (defn str-sort[str] > (if (nil? str) > str > (String. (into-array (. Character TYPE) (sort str) > (and str (String. (into-array (. Character TYPE) (sort str (if str is nil this will return nil) > (defn anagram-add[anagrams akey word] > (if (empty? (get anagrams akey)) > (assoc anagrams akey (hash-map :count 1, :words (list word))) > (update-in (update-in anagrams [akey :count] inc) [akey :words] > conj word))) > You could use a literal {:count 1 :words (list words)} instead of hash-map I believe that (get anagrams akey) will give nil in case there is known. If it is the case (or (get anagrams key) ...) will be more readable. (update-in ...) => (-> anagrams (update-in [akey :count] inc) (update-in [akey :words] conj word)) I will have a look to the rest of the program later. Best, Nicolas. -- You received this message because you are subscribed to the Google Groups "Clojure" group. To post to this group, send email to clojure@googlegroups.com Note that posts from new members are moderated - please be patient with your first post. To unsubscribe from this group, send email to clojure+unsubscr...@googlegroups.com For more options, visit this group at http://groups.google.com/group/clojure?hl=en
Feedback on idiomatic clojure
Hi Everyone, I'm looking for some feedback on a small get-your-feet wet program I wrote in clojure (my first). In particular, I hope to get feedback on how to better make use of clojure's data structures and functional programming in general. Below is the toy program and the sample output. It reads in words from the OS X dictionary file, catalogs the anagrams, and then prints out the top ten by frequency. Although performance is not my primary concern here, it is a bit slow. It takes about 15s on my mac book pro. As an example, is this the "proper" way to update multiple fields at once in a nested hash-map: (update-in (update-in anagrams [akey :count] inc) [akey :words] conj word))) I understand this as returning an copy of an updated copy of an updated copy. Any feedback would be appreciated. Thanks, Damon ## begin (defn f-to-seq[file] (with-open [rdr (java.io.BufferedReader. (java.io.FileReader. file))] (doall (line-seq rdr (defn str-sort[str] (if (nil? str) str (String. (into-array (. Character TYPE) (sort str) (defn str-to-lower[str] (if (nil? str) str (.toLowerCase str))) (defn anagram-add[anagrams akey word] (if (empty? (get anagrams akey)) (assoc anagrams akey (hash-map :count 1, :words (list word))) (update-in (update-in anagrams [akey :count] inc) [akey :words] conj word))) (defn word[seq] (first seq)) (defn build-anagrams[seq] (loop [seq seq akey (str-sort (str-to-lower (word seq))) anagrams {}] (if (empty? seq) anagrams (recur (rest seq) (str-sort (str-to-lower (word (rest seq (anagram-add anagrams akey (word seq)) (defn print-anagram[v] (println (str (:count (first (rest v))) " " (first v) ": " (:words (first (rest v)) (defn print-anagrams[ana] (doseq [v ana] (print-anagram v))) (defn anagram-key[elem] (:count (first (rest elem (def *words* (f-to-seq "/usr/share/dict/web2")) (def *anagrams* (sort-by anagram-key (build-anagrams *words*))) (print-anagrams (take 10 (reverse *anagrams*))) ## output 11 agnor: ("Ronga" "rogan" "organ" "orang" "Orang" "nagor" "groan" "grano" "goran" "argon" "angor") 10 aelps: ("speal" "spale" "slape" "sepal" "saple" "salep" "Pales" "Lepas" "lapse" "Elaps") 9 eerst: ("tsere" "terse" "stree" "stere" "steer" "reset" "reest" "estre" "ester") 9 aeerst: ("teaser" "staree" "seater" "saeter" "reseat" "Eastre" "easter" "Easter" "asteer") 9 aacinr: ("narica" "crania" "Crania" "carina" "Carian" "canari" "Canari" "arnica" "acinar") 9 acert: ("trace" "recta" "react" "creta" "creat" "crate" "cater" "carte" "caret") 8 ailr: ("rial" "rail" "lira" "liar" "lari" "Lari" "lair" "aril") 8 aelpt: ("tepal" "pleat" "plate" "petal" "pelta" "patel" "palet" "leapt") 8 airst: ("Trias" "tisar" "tarsi" "stria" "stair" "sitar" "astir" "arist") 8 aemrt: ("Trema" "trame" "terma" "tamer" "ramet" "metra" "mater" "armet") -- You received this message because you are subscribed to the Google Groups "Clojure" group. To post to this group, send email to clojure@googlegroups.com Note that posts from new members are moderated - please be patient with your first post. To unsubscribe from this group, send email to clojure+unsubscr...@googlegroups.com For more options, visit this group at http://groups.google.com/group/clojure?hl=en