While playing around and implementing straight up Humman compression,
I wrote a handful of utilities to conveinently play with byte and bit
streams because I didn't see anything too helpful in the mmap and
duck_stream files.

What I wrote would need to be changed to better work with the existing
code and to increase performance, but does anyone think that it would
be helpful to add these things for playing with binary data into
contrib?

; to exercise everything -- use this to copy a file -- obviously
change the files
(time (to-file "/Users/kaz/Desktop/Programming Clojure-copy.pdf" (bit-
to-byte-stream (byte-to-bit-stream (to-byte-stream "/Users/kaz/Desktop/
Programming Clojure.pdf")))))

; to play with Huffman compress/decompression
(compress-file "/Users/kaz/Desktop/onlisp.pdf" "/Users/kaz/Desktop/
onlisp.pdf.compress")
(uncompress-file "/Users/kaz/Desktop/onlisp.pdf.compress" "/Users/kaz/
Desktop/onlisp2.pdf")

;------------------------------------------------------------------------------------------------
;-------------- setup files to act like streams at the bit level
-------------
;------------------------------------------------------------------------------------------------
(defn array-to-list [arr list-size]
   (loop [index 0 accum []]
     (if (or (= index (alength arr)) (= index list-size))
       accum
       (recur (inc index) (conj accum (aget arr index))))))

(defn list-to-array [l array-type arr-size]
  (let [arr-size (min arr-size (count l))
        arr (make-array array-type arr-size)]
    (loop [index 0 l l]
      (if (= index arr-size)
        arr
        (do
          (aset arr index (first l))
          (recur (inc index) (rest l)))))))

(defn num-to-bits [num size]
  (loop [num num size size accum nil]
    (if (= size 0)
      accum
      (let [next-n (bit-shift-right num 1)
            next-bit (bit-xor num (bit-shift-left next-n 1))]
        (recur next-n (dec size) (cons next-bit accum))))))

(import '(java.io FileInputStream FileOutputStream))
(defn to-byte-stream [filename]
   (let [bufsize 65536
        is (FileInputStream. filename)
        read-buf
           (fn []
             (let [buf (make-array (Byte/TYPE) bufsize)
                   num-read (.read is buf 0 bufsize)]
               (if (= num-read -1)
                 (do
                   (.close is)
                   nil)
                 (array-to-list buf num-read))))
        read-all
           (fn read-all []
             (let [next-buf (read-buf)]
               (if (nil? next-buf)
                 nil
                 (lazy-cat next-buf (read-all)))))]
     (read-all)))

(defn byte-to-bit-stream [l]
   (if (nil? l)
     nil
     (lazy-cat (num-to-bits (first l) 8) (byte-to-bit-stream (rest
l)))))

(defn bits-to-num [l num-bit-size]
   (let [cnt (count (take num-bit-size l))]
     (if (< cnt num-bit-size)
        (bits-to-num (lazy-cat l (take (- num-bit-size cnt) (repeat
0))) num-bit-size)
       (loop [l l size num-bit-size accum 0]
        (if (= size 0)
          (list accum l)
          (recur (rest l) (dec size) (bit-or (bit-shift-left accum 1)
(first l))))))))

(defn bit-to-byte-stream [l]
   (if (nil? l)
     nil
     (let [[next-byte rst] (bits-to-num l 8)]
       (lazy-cons next-byte (bit-to-byte-stream rst)))))

(defn to-file [filename byte-stream]
  (let [os (FileOutputStream. filename)]
    (loop [bytes byte-stream bytes-written 0]
      (if (nil? bytes)
        (do
          (.close os)
          bytes-written)
        (let [[bytes rest-bytes] (split-at 65536 bytes)
              buf (list-to-array (map byte bytes) (Byte/TYPE) 65536)
              wrote-now (alength buf)]
          (do
             (.write os buf 0 wrote-now)
             (recur rest-bytes (+ bytes-written wrote-now))))))))

;------------------------------------------------------------------------------------------------
;-------------- Using byte/bit streams to do huffman -------------
;------------------------------------------------------------------------------------------------

(defn get-occurances [l]
 (loop [l l accum {}]
   (if (empty? l)
     accum
     (let [next (first l)
            current-count (if (nil? (accum next)) 0 (accum next))
            next-accum (assoc accum next (inc current-count))]
        (recur (rest l) next-accum)))))


(defn tree-has-children? [tree]
 (and (not (nil? tree)) (or (not (nil? (tree :left-child))) (not (nil?
(tree :right-child))))))
(defn tree-has-lchild? [tree]
 (and (not (nil? tree)) (not (nil? (tree :left-child)))))
(defn tree-has-rchild? [tree]
 (and (not (nil? tree)) (not (nil? (tree :right-child)))))
(defn tree-count [tree]
 (if (nil? tree)
   0
   (+ 1
      (if (tree-has-lchild? tree) (tree-count (tree :left-child)) 0)
      (if (tree-has-rchild? tree) (tree-count (tree :right-child))
0))))

(defn get-hufftree [occurances]
 (let [build (fn build [occurances]
                (let [sorted-occurances (sort-by (fn [[tree val num-occur]] num-
occur) occurances)]
                  (cond (nil? sorted-occurances)
                        nil
                        (nil? (frest sorted-occurances))
                        (let [[tree val occur] (first sorted-occurances)]
                          (if (nil? tree)
                            {:type :leaf :data val :num-occurances occur}
                            tree))
                        :else
                        (let [[tree1 val1 occur1] (first sorted-occurances)
                              tree1 (if (nil? tree1) {:type :leaf :data val1 
:num-
occurances occur1} tree1)
                              [tree2 val2 occur2] (frest sorted-occurances)
                              tree2 (if (nil? tree2) {:type :leaf :data val2 
:num-
occurances occur2} tree2)
                              combined-occur (+ occur1 occur2)
                              tree1-cnt (tree-count tree1)
                              tree2-cnt (tree-count tree2)
                              left-tree (cond (and (= tree1-cnt tree2-cnt) (= 
:leaf
(tree1 :type)) (= :leaf (tree2 :type)))
                                              (if (> occur1 occur2)
                                                tree2
                                                tree1)
                                              (> tree1-cnt tree2-cnt)
                                              tree1
                                              :else
                                              tree2)
                              right-tree (if (= left-tree tree1) tree2 tree1)
                              combined-tree {:type :node
                                             :num-occurances combined-occur
                                             :left-child left-tree :right-child 
right-tree}
                              lst (cons (list combined-tree nil combined-occur) 
(rrest
sorted-occurances))]
                          (build lst)))))]
   (build (map #(cons nil %1) occurances))))


(defn print-tree [tree]
(if (nil? tree)
  (println "tree is nil")
  (do
    (println "data" (tree :data))
    (println "--right-child--")
    (print-tree (tree :right-child))
    (println "--left-child--")
    (print-tree (tree :left-child)))))


(defn map-tree-preorder [f tree]
 (cond (nil? tree)
           nil
        (= :leaf (tree :type))
           (list (f tree))
        :else
           (let [accum-1 (map-tree-preorder f (tree :left-child))
                 accum-2 (lazy-cat accum-1 (map-tree-preorder f (tree :right-
child)))
                 accum-3 (lazy-cat accum-2 (list (f tree)))]
             accum-3)))

(defn tree-pre-index [root tree]
 (loop [l (map-tree-preorder identity root) cnt 0]
   (cond (nil? l)
             nil
          (= (first l) tree)
             cnt
          :else
             (recur (rest l) (inc cnt)))))

(defn hufftree-to-bits [htree]
 (let [leaf-as-bits
        (fn [t]
          (lazy-cat (list 1) (num-to-bits (t :data) 9)))
        node-as-bits
        (fn [t]
          (lazy-cat
                (list 0)
                (num-to-bits (tree-pre-index htree (t :left-child)) 9)
                (num-to-bits (tree-pre-index htree (t :right-child)) 9)))]
 (reduce concat nil (map-tree-preorder #(if (= :leaf (%1 :type)) (leaf-
as-bits %1) (node-as-bits %1)) htree))
))


(defn bits-to-hufftree [bits num-nodes]
 (loop [accum nil bits bits num-nodes num-nodes]
   (if (= 0 num-nodes)
     (list (last accum) bits)
     (let [[[next-bit] rest-bits] (split-at 1 bits)]
        (if (= 1 next-bit)
          (let [[data rest-bits] (bits-to-num rest-bits 9)
                leaf {:type :leaf :data data :pre-index (count accum)}]
            (recur (lazy-cat accum (list leaf)) rest-bits (dec num-nodes)))
          (let [[left-child-index rest-bits] (bits-to-num rest-bits 9)
                left-child (nth accum left-child-index)
                [right-child-index rest-bits] (bits-to-num rest-bits 9)
                right-child (nth accum right-child-index)
                node {:type :node :pre-index (count accum) :right-child right-
child :left-child left-child}]
            (recur (lazy-cat accum (list node)) rest-bits (dec num-
nodes))))))))


(defn applyHuffForEntry [tree entry]
 (let [path (reverse
  ((fn ahfe [tree path]
     (cond (nil? tree)
               nil
            (= (tree :data) entry)
               path
            :else
               (let [left-path (ahfe (tree :left-child) (cons 0 path))]
                 (if (nil? left-path)
                   (ahfe (tree :right-child) (cons 1 path))
                   left-path)))) tree nil))]
   (if (nil? path)
     (throw (IllegalArgumentException. "entry not found!"))
     path)))


(defn unapplyHuffForEntry [tree bits]
(loop [tree tree bits bits]
  (let [next-bit (first bits)
          right-child (if (nil? tree) nil (tree :right-child))
          left-child (if (nil? tree) nil (tree :left-child))]
    (cond (nil? tree)
               (throw (IllegalArgumentException. "Tree must not be null!"))
            (= 1 next-bit)
               (if (= :leaf (right-child :type))
                 (list (right-child :data) (rest bits))
                 (recur right-child (rest bits)))
            (= 0 next-bit)
               (if (= :leaf (left-child :type))
                 (list (left-child :data) (rest bits))
                 (recur left-child (rest bits)))))))


(defn applyHuffForEntries [tree entries]
 (if (nil? entries)
   nil
   (lazy-cat (applyHuffForEntry tree (first entries))
              (applyHuffForEntries tree (rest entries)))))

(defn unapplyHuffForEntries [tree bits num-expect]
(cond
 (= num-expect 0)
    nil
 (nil? bits)
    (throw (IllegalArgumentException. (str "Too few bits. Still
expect: " num-expect)))
 :else
    (let [[data remaining-bits] (unapplyHuffForEntry tree bits)]
        (lazy-cons data (unapplyHuffForEntries tree remaining-bits (dec num-
expect))))))


(defn compress-file [in-filename out-filename]
 (let [contents (to-byte-stream in-filename) ;bad to hold in memory
        byte-count (count contents)
        huff-tree (get-hufftree (get-occurances contents))
        num-nodes (tree-count huff-tree)
        huff-tree-bits (hufftree-to-bits huff-tree)
        compress-bits (applyHuffForEntries huff-tree contents)
        file-bits (lazy-cat (num-to-bits byte-count 63) (num-to-bits num-
nodes 9) huff-tree-bits compress-bits)]
   (do
;      (println "original num bits" (* byte-count 8))
;      (println "num-nodes" count)
;      (print-hufftree huff-tree)
;      (println "size huff-tree-bits" (count huff-tree-bits))
;      (println "size compress-bits" (count compress-bits))
     (to-file out-filename (bit-to-byte-stream file-bits))
)))


(defn uncompress-file [in-filename out-filename]
 (let [bit-contents (byte-to-bit-stream (to-byte-stream in-filename))
        [byte-count rest-bits] (bits-to-num bit-contents 63)
        [num-nodes rest-bits] (bits-to-num rest-bits 9)
        [huff-tree rest-bits] (bits-to-hufftree rest-bits num-nodes)
        uncompress-bytes (unapplyHuffForEntries huff-tree rest-bits byte-
count)]
   (do
     (to-file out-filename uncompress-bytes))))


--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google Groups 
"Clojure" group.
To post to this group, send email to clojure@googlegroups.com
To unsubscribe from this group, send email to 
clojure+unsubscr...@googlegroups.com
For more options, visit this group at 
http://groups.google.com/group/clojure?hl=en
-~----------~----~----~----~------~----~------~--~---

Reply via email to