Could you throw this on github, so we can easily follow along with
improvements?

On Mar 22, 8:25 pm, "zoglma...@gmail.com" <zoglma...@gmail.com> wrote:
> While playing around and implementing straight up Humman compression,
> I wrote a handful of utilities to conveinently play with byte and bit
> streams because I didn't see anything too helpful in the mmap and
> duck_stream files.
>
> What I wrote would need to be changed to better work with the existing
> code and to increase performance, but does anyone think that it would
> be helpful to add these things for playing with binary data into
> contrib?
>
> ; to exercise everything -- use this to copy a file -- obviously
> change the files
> (time (to-file "/Users/kaz/Desktop/Programming Clojure-copy.pdf" (bit-
> to-byte-stream (byte-to-bit-stream (to-byte-stream "/Users/kaz/Desktop/
> Programming Clojure.pdf")))))
>
> ; to play with Huffman compress/decompression
> (compress-file "/Users/kaz/Desktop/onlisp.pdf" "/Users/kaz/Desktop/
> onlisp.pdf.compress")
> (uncompress-file "/Users/kaz/Desktop/onlisp.pdf.compress" "/Users/kaz/
> Desktop/onlisp2.pdf")
>
> ;-------------------------------------------------------------------------- 
> ----------------------
> ;-------------- setup files to act like streams at the bit level
> -------------
> ;-------------------------------------------------------------------------- 
> ----------------------
> (defn array-to-list [arr list-size]
>    (loop [index 0 accum []]
>      (if (or (= index (alength arr)) (= index list-size))
>        accum
>        (recur (inc index) (conj accum (aget arr index))))))
>
> (defn list-to-array [l array-type arr-size]
>   (let [arr-size (min arr-size (count l))
>         arr (make-array array-type arr-size)]
>     (loop [index 0 l l]
>       (if (= index arr-size)
>         arr
>         (do
>           (aset arr index (first l))
>           (recur (inc index) (rest l)))))))
>
> (defn num-to-bits [num size]
>   (loop [num num size size accum nil]
>     (if (= size 0)
>       accum
>       (let [next-n (bit-shift-right num 1)
>             next-bit (bit-xor num (bit-shift-left next-n 1))]
>         (recur next-n (dec size) (cons next-bit accum))))))
>
> (import '(java.io FileInputStream FileOutputStream))
> (defn to-byte-stream [filename]
>    (let [bufsize 65536
>         is (FileInputStream. filename)
>         read-buf
>            (fn []
>              (let [buf (make-array (Byte/TYPE) bufsize)
>                    num-read (.read is buf 0 bufsize)]
>                (if (= num-read -1)
>                  (do
>                    (.close is)
>                    nil)
>                  (array-to-list buf num-read))))
>         read-all
>            (fn read-all []
>              (let [next-buf (read-buf)]
>                (if (nil? next-buf)
>                  nil
>                  (lazy-cat next-buf (read-all)))))]
>      (read-all)))
>
> (defn byte-to-bit-stream [l]
>    (if (nil? l)
>      nil
>      (lazy-cat (num-to-bits (first l) 8) (byte-to-bit-stream (rest
> l)))))
>
> (defn bits-to-num [l num-bit-size]
>    (let [cnt (count (take num-bit-size l))]
>      (if (< cnt num-bit-size)
>         (bits-to-num (lazy-cat l (take (- num-bit-size cnt) (repeat
> 0))) num-bit-size)
>        (loop [l l size num-bit-size accum 0]
>         (if (= size 0)
>           (list accum l)
>           (recur (rest l) (dec size) (bit-or (bit-shift-left accum 1)
> (first l))))))))
>
> (defn bit-to-byte-stream [l]
>    (if (nil? l)
>      nil
>      (let [[next-byte rst] (bits-to-num l 8)]
>        (lazy-cons next-byte (bit-to-byte-stream rst)))))
>
> (defn to-file [filename byte-stream]
>   (let [os (FileOutputStream. filename)]
>     (loop [bytes byte-stream bytes-written 0]
>       (if (nil? bytes)
>         (do
>           (.close os)
>           bytes-written)
>         (let [[bytes rest-bytes] (split-at 65536 bytes)
>               buf (list-to-array (map byte bytes) (Byte/TYPE) 65536)
>               wrote-now (alength buf)]
>           (do
>              (.write os buf 0 wrote-now)
>              (recur rest-bytes (+ bytes-written wrote-now))))))))
>
> ;-------------------------------------------------------------------------- 
> ----------------------
> ;-------------- Using byte/bit streams to do huffman -------------
> ;-------------------------------------------------------------------------- 
> ----------------------
>
> (defn get-occurances [l]
>  (loop [l l accum {}]
>    (if (empty? l)
>      accum
>      (let [next (first l)
>             current-count (if (nil? (accum next)) 0 (accum next))
>             next-accum (assoc accum next (inc current-count))]
>         (recur (rest l) next-accum)))))
>
> (defn tree-has-children? [tree]
>  (and (not (nil? tree)) (or (not (nil? (tree :left-child))) (not (nil?
> (tree :right-child))))))
> (defn tree-has-lchild? [tree]
>  (and (not (nil? tree)) (not (nil? (tree :left-child)))))
> (defn tree-has-rchild? [tree]
>  (and (not (nil? tree)) (not (nil? (tree :right-child)))))
> (defn tree-count [tree]
>  (if (nil? tree)
>    0
>    (+ 1
>       (if (tree-has-lchild? tree) (tree-count (tree :left-child)) 0)
>       (if (tree-has-rchild? tree) (tree-count (tree :right-child))
> 0))))
>
> (defn get-hufftree [occurances]
>  (let [build (fn build [occurances]
>                 (let [sorted-occurances (sort-by (fn [[tree val num-occur]] 
> num-
> occur) occurances)]
>                   (cond (nil? sorted-occurances)
>                         nil
>                         (nil? (frest sorted-occurances))
>                         (let [[tree val occur] (first sorted-occurances)]
>                           (if (nil? tree)
>                             {:type :leaf :data val :num-occurances occur}
>                             tree))
>                         :else
>                         (let [[tree1 val1 occur1] (first sorted-occurances)
>                               tree1 (if (nil? tree1) {:type :leaf :data val1 
> :num-
> occurances occur1} tree1)
>                               [tree2 val2 occur2] (frest sorted-occurances)
>                               tree2 (if (nil? tree2) {:type :leaf :data val2 
> :num-
> occurances occur2} tree2)
>                               combined-occur (+ occur1 occur2)
>                               tree1-cnt (tree-count tree1)
>                               tree2-cnt (tree-count tree2)
>                               left-tree (cond (and (= tree1-cnt tree2-cnt) (= 
> :leaf
> (tree1 :type)) (= :leaf (tree2 :type)))
>                                               (if (> occur1 occur2)
>                                                 tree2
>                                                 tree1)
>                                               (> tree1-cnt tree2-cnt)
>                                               tree1
>                                               :else
>                                               tree2)
>                               right-tree (if (= left-tree tree1) tree2 tree1)
>                               combined-tree {:type :node
>                                              :num-occurances combined-occur
>                                              :left-child left-tree 
> :right-child right-tree}
>                               lst (cons (list combined-tree nil 
> combined-occur) (rrest
> sorted-occurances))]
>                           (build lst)))))]
>    (build (map #(cons nil %1) occurances))))
>
> (defn print-tree [tree]
> (if (nil? tree)
>   (println "tree is nil")
>   (do
>     (println "data" (tree :data))
>     (println "--right-child--")
>     (print-tree (tree :right-child))
>     (println "--left-child--")
>     (print-tree (tree :left-child)))))
>
> (defn map-tree-preorder [f tree]
>  (cond (nil? tree)
>            nil
>         (= :leaf (tree :type))
>            (list (f tree))
>         :else
>            (let [accum-1 (map-tree-preorder f (tree :left-child))
>                  accum-2 (lazy-cat accum-1 (map-tree-preorder f (tree :right-
> child)))
>                  accum-3 (lazy-cat accum-2 (list (f tree)))]
>              accum-3)))
>
> (defn tree-pre-index [root tree]
>  (loop [l (map-tree-preorder identity root) cnt 0]
>    (cond (nil? l)
>              nil
>           (= (first l) tree)
>              cnt
>           :else
>              (recur (rest l) (inc cnt)))))
>
> (defn hufftree-to-bits [htree]
>  (let [leaf-as-bits
>         (fn [t]
>           (lazy-cat (list 1) (num-to-bits (t :data) 9)))
>         node-as-bits
>         (fn [t]
>           (lazy-cat
>                 (list 0)
>                 (num-to-bits (tree-pre-index htree (t :left-child)) 9)
>                 (num-to-bits (tree-pre-index htree (t :right-child)) 9)))]
>  (reduce concat nil (map-tree-preorder #(if (= :leaf (%1 :type)) (leaf-
> as-bits %1) (node-as-bits %1)) htree))
> ))
>
> (defn bits-to-hufftree [bits num-nodes]
>  (loop [accum nil bits bits num-nodes num-nodes]
>    (if (= 0 num-nodes)
>      (list (last accum) bits)
>      (let [[[next-bit] rest-bits] (split-at 1 bits)]
>         (if (= 1 next-bit)
>           (let [[data rest-bits] (bits-to-num rest-bits 9)
>                 leaf {:type :leaf :data data :pre-index (count accum)}]
>             (recur (lazy-cat accum (list leaf)) rest-bits (dec num-nodes)))
>           (let [[left-child-index rest-bits] (bits-to-num rest-bits 9)
>                 left-child (nth accum left-child-index)
>                 [right-child-index rest-bits] (bits-to-num rest-bits 9)
>                 right-child (nth accum right-child-index)
>                 node {:type :node :pre-index (count accum) :right-child right-
> child :left-child left-child}]
>             (recur (lazy-cat accum (list node)) rest-bits (dec num-
> nodes))))))))
>
> (defn applyHuffForEntry [tree entry]
>  (let [path (reverse
>   ((fn ahfe [tree path]
>      (cond (nil? tree)
>                nil
>             (= (tree :data) entry)
>                path
>             :else
>                (let [left-path (ahfe (tree :left-child) (cons 0 path))]
>                  (if (nil? left-path)
>                    (ahfe (tree :right-child) (cons 1 path))
>                    left-path)))) tree nil))]
>    (if (nil? path)
>      (throw (IllegalArgumentException. "entry not found!"))
>      path)))
>
> (defn unapplyHuffForEntry [tree bits]
> (loop [tree tree bits bits]
>   (let [next-bit (first bits)
>           right-child (if (nil? tree) nil (tree :right-child))
>           left-child (if (nil? tree) nil (tree :left-child))]
>     (cond (nil? tree)
>                (throw (IllegalArgumentException. "Tree must not be null!"))
>             (= 1 next-bit)
>                (if (= :leaf (right-child :type))
>                  (list (right-child :data) (rest bits))
>                  (recur right-child (rest bits)))
>             (= 0 next-bit)
>                (if (= :leaf (left-child :type))
>                  (list (left-child :data) (rest bits))
>                  (recur left-child (rest bits)))))))
>
> (defn applyHuffForEntries [tree entries]
>  (if (nil? entries)
>    nil
>    (lazy-cat (applyHuffForEntry tree (first entries))
>               (applyHuffForEntries tree (rest entries)))))
>
> (defn unapplyHuffForEntries [tree bits num-expect]
> (cond
>  (= num-expect 0)
>     nil
>  (nil? bits)
>     (throw (IllegalArgumentException. (str "Too few bits. Still
> expect: " num-expect)))
>  :else
>     (let [[data remaining-bits] (unapplyHuffForEntry tree bits)]
>         (lazy-cons data (unapplyHuffForEntries tree remaining-bits (dec num-
> expect))))))
>
> (defn compress-file [in-filename out-filename]
>  (let [contents (to-byte-stream in-filename) ;bad to hold in memory
>         byte-count (count contents)
>         huff-tree (get-hufftree (get-occurances contents))
>         num-nodes (tree-count huff-tree)
>         huff-tree-bits (hufftree-to-bits huff-tree)
>         compress-bits (applyHuffForEntries huff-tree contents)
>         file-bits (lazy-cat (num-to-bits byte-count 63) (num-to-bits num-
> nodes 9) huff-tree-bits compress-bits)]
>    (do
> ;      (println "original num bits" (* byte-count 8))
> ;      (println "num-nodes" count)
> ;      (print-hufftree huff-tree)
> ;      (println "size huff-tree-bits" (count huff-tree-bits))
> ;      (println "size compress-bits" (count compress-bits))
>      (to-file out-filename (bit-to-byte-stream file-bits))
> )))
>
> (defn uncompress-file [in-filename out-filename]
>  (let [bit-contents (byte-to-bit-stream (to-byte-stream in-filename))
>         [byte-count rest-bits] (bits-to-num bit-contents 63)
>         [num-nodes rest-bits] (bits-to-num rest-bits 9)
>         [huff-tree rest-bits] (bits-to-hufftree rest-bits num-nodes)
>         uncompress-bytes (unapplyHuffForEntries huff-tree rest-bits byte-
> count)]
>    (do
>      (to-file out-filename uncompress-bytes))))
--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google Groups 
"Clojure" group.
To post to this group, send email to clojure@googlegroups.com
To unsubscribe from this group, send email to 
clojure+unsubscr...@googlegroups.com
For more options, visit this group at 
http://groups.google.com/group/clojure?hl=en
-~----------~----~----~----~------~----~------~--~---

Reply via email to