Hi,

I've embellished my proposed patch a bit:

 - use values resp. call-with-values instead of passing around
   lists.

   This was one thing I didn't like about my first patch candidate:
   the namespace --> ns abbreviation lookup had two things to return,
   for noe the abbreviation, and whether this abbreviation was "new"
   (for convenience in the form of a (namespace . abbreviation) pair).
   Instead of returning a list, now it returns multiple values.

 - patch is now against current stable instead of against "whatever
   Debian stable packages", i.e. against

   d680713 2015-04-03 16:35:54 +0200 Ludovic Courtès (stable-2.0) doc: Update 
libgc URL.

I'm still not sure whether this is the way to go (i.e. mixing the
abbreviation stuff into the serialization), or whether a pre-pass
(replacing namespaces by abbreviations and generating the namespace
declaration "attributes") would be the way to go.

Besides, I'd like to have some input on whether it'd be worth to
follow the usual convention and to put the namespace declarations
before regular attributes (forcing us to do two passes on a tag
node's attribute list). The generated XML looks pretty weird as
is now.

What I'd still like to introduce is a "mapping preference" as an
optional argument by the user, possibly per-node (like "I'd like
'http://www.w3.org/1999/xlink' to be abbreviated as 'xlink' or
something like that). Other XML serializers offer that. I envision
this as a function, the library would fall back to generate the
abbreviation whenever the function returns #f.

The question on whether this patch (or whatever it evolves into)
has a chance of getting into Guile is still open: I'd have to
get my papers from the FSF in this case.

Inputs?
diff --git a/module/sxml/simple.scm b/module/sxml/simple.scm
index 703ad91..86b0784 100644
--- a/module/sxml/simple.scm
+++ b/module/sxml/simple.scm
@@ -215,29 +215,38 @@ port."
          (elements (reverse (parser port '()))))
     `(*TOP* ,@elements)))
 
-(define check-name
-  (let ((*good-cache* (make-hash-table)))
-    (lambda (name)
-      (if (not (hashq-ref *good-cache* name))
-          (let* ((str (symbol->string name))
-                 (i (string-index str #\:))
-                 (head (or (and i (substring str 0 i)) str))
-                 (tail (and i (substring str (1+ i)))))
-            (and i (string-index (substring str (1+ i)) #\:)
-                 (error "Invalid QName: more than one colon" name))
-            (for-each
-             (lambda (s)
-               (and s
-                    (or (char-alphabetic? (string-ref s 0))
-                        (eq? (string-ref s 0) #\_)
-                        (error "Invalid name starting character" s name))
-                    (string-for-each
-                     (lambda (c)
-                       (or (char-alphabetic? c) (string-index "0123456789.-_" c)
-                           (error "Invalid name character" c s name)))
-                     s)))
-             (list head tail))
-            (hashq-set! *good-cache* name #t))))))
+(define (ns-lookup ns nsmap)
+  "Look up namespace ns in nsmap. Return its abbreviation or #f"
+  (assoc-ref nsmap ns))
+
+(define ns-abbr-new
+  (let ((*nscounter* 0))
+    (lambda ()
+      (set! *nscounter* (1+ *nscounter*))
+      (string-append "ns" (number->string *nscounter*)))))
+
+(define (ns-abbr name nsmap)
+  "Takes a QName, SXML style (i.e a symbol whose string value is either a
+clean local name or a colon-concatenated pair of namespace:name, and returns
+two values: the string  <nsabbrev>:<local-name> and either a pair (<namespace> .
+nsabbrev) whenever <namespace> wasn't in nsmap, or #f when it was"
+  ;; FIXME check for empty ns (e.g ":foo")
+  ;; check (worse!) for empty locname (e.g. "foo:")
+  (let* ((str (symbol->string name))
+         (i (string-rindex str #\:))
+         (ns (and i (substring str 0 i)))
+         (locname (or (and i (substring str (1+ i))) str)))
+    (if ns
+        (let ((nsabbr (ns-lookup ns nsmap)))
+          (if nsabbr
+              ;; known namespace:
+              (values (string-append nsabbr ":" locname) #f)
+              ;; unknown namespace
+              (let ((nsabbr (ns-abbr-new)))
+                (values (string-append nsabbr ":" locname)
+                      (cons ns nsabbr)))))
+        ;; empty namespace: clean local-name:
+        (values locname #f))))
 
 ;; The following two functions serialize tags and attributes. They are
 ;; being used in the node handlers for the post-order function, see
@@ -260,42 +269,58 @@ port."
      port))))
 
 (define (attribute->xml attr value port)
-  (check-name attr)
   (display attr port)
   (display "=\"" port)
   (attribute-value->xml value port)
   (display #\" port))
 
-(define (element->xml tag attrs body port)
-  (check-name tag)
-  (display #\< port)
-  (display tag port)
-  (if attrs
-      (let lp ((attrs attrs))
-        (if (pair? attrs)
-            (let ((attr (car attrs)))
+(define (element->xml tag attrs body port nsmap)
+  (let ((new-namespaces '()))
+    (call-with-values (lambda () (ns-abbr tag  nsmap))
+      (lambda (abname new-ns)
+        (when new-ns
+          (set! new-namespaces (cons new-ns new-namespaces)))
+        (display #\< port)
+        (display abname port)
+        (if attrs
+            (let lp ((attrs attrs))
+              (if (pair? attrs)
+                  (let ((attr (car attrs)))
+                    (display #\space port)
+                    (if (pair? attr)
+                        (call-with-values (lambda () (ns-abbr (car attr) nsmap))
+                          (lambda (abname new-ns)
+                            (when new-ns
+                              (set! new-namespaces (cons new-ns new-namespaces)))
+                            (attribute->xml abname (cdr attr) port)))
+                        (error "bad attribute" tag attr))
+                    (lp (cdr attrs)))
+                  (if (not (null? attrs))
+                      (error "bad attributes" tag attrs)))))
+        ;; Output namespace declarations
+        (let lp ((new-namespaces new-namespaces))
+          (unless (null? new-namespaces)
+            ;; remember: car is namespace, cdr is abbrev
+            (let ((ns (caar new-namespaces))
+                  (nsabbr (cdar new-namespaces)))
               (display #\space port)
-              (if (pair? attr)
-                  (attribute->xml (car attr) (cdr attr) port)
-                  (error "bad attribute" tag attr))
-              (lp (cdr attrs)))
-            (if (not (null? attrs))
-                (error "bad attributes" tag attrs)))))
-  (if (pair? body)
-      (begin
-        (display #\> port)
-        (let lp ((body body))
-          (cond
-           ((pair? body)
-            (sxml->xml (car body) port)
-            (lp (cdr body)))
-           ((null? body)
-            (display "</" port)
-            (display tag port)
-            (display ">" port))
-           (else
-            (error "bad element body" tag body)))))
-      (display " />" port)))
+              (attribute->xml (string-append "xmlns:" nsabbr) ns port))
+            (lp (cdr new-namespaces))))
+        (if (pair? body)
+            (begin
+              (display #\> port)
+              (let lp ((body body))
+                (cond
+                 ((pair? body)
+                  (sxml->xml (car body) port (append new-namespaces nsmap))
+                  (lp (cdr body)))
+                 ((null? body)
+                  (display "</" port)
+                  (display abname port)
+                  (display ">" port))
+                 (else
+                  (error "bad element body" tag body)))))
+            (display " />" port))))))
 
 ;; FIXME: ensure name is valid
 (define (entity->xml name port)
@@ -311,7 +336,8 @@ port."
   (display str port)
   (display "?>" port))
 
-(define* (sxml->xml tree #:optional (port (current-output-port)))
+(define* (sxml->xml tree #:optional (port (current-output-port))
+                    (nsmap '()))
   "Serialize the sxml tree @var{tree} as XML. The output will be written
 to the current output port, unless the optional argument @var{port} is
 present."
@@ -322,7 +348,7 @@ present."
         (let ((tag (car tree)))
           (case tag
             ((*TOP*)
-             (sxml->xml (cdr tree) port))
+             (sxml->xml (cdr tree) port nsmap))
             ((*ENTITY*)
              (if (and (list? (cdr tree)) (= (length (cdr tree)) 1))
                  (entity->xml (cadr tree) port)
@@ -336,9 +362,9 @@ present."
                     (attrs (and (pair? elems) (pair? (car elems))
                                 (eq? '@ (caar elems))
                                 (cdar elems))))
-               (element->xml tag attrs (if attrs (cdr elems) elems) port)))))
+               (element->xml tag attrs (if attrs (cdr elems) elems) port nsmap)))))
         ;; A nodelist.
-        (for-each (lambda (x) (sxml->xml x port)) tree)))
+        (for-each (lambda (x) (sxml->xml x port nsmap)) tree)))
    ((string? tree)
     (string->escaped-xml tree port))
    ((null? tree) *unspecified*)

Attachment: signature.asc
Description: Digital signature

Reply via email to