Hi, I've embellished my proposed patch a bit:
- use values resp. call-with-values instead of passing around lists. This was one thing I didn't like about my first patch candidate: the namespace --> ns abbreviation lookup had two things to return, for noe the abbreviation, and whether this abbreviation was "new" (for convenience in the form of a (namespace . abbreviation) pair). Instead of returning a list, now it returns multiple values. - patch is now against current stable instead of against "whatever Debian stable packages", i.e. against d680713 2015-04-03 16:35:54 +0200 Ludovic Courtès (stable-2.0) doc: Update libgc URL. I'm still not sure whether this is the way to go (i.e. mixing the abbreviation stuff into the serialization), or whether a pre-pass (replacing namespaces by abbreviations and generating the namespace declaration "attributes") would be the way to go. Besides, I'd like to have some input on whether it'd be worth to follow the usual convention and to put the namespace declarations before regular attributes (forcing us to do two passes on a tag node's attribute list). The generated XML looks pretty weird as is now. What I'd still like to introduce is a "mapping preference" as an optional argument by the user, possibly per-node (like "I'd like 'http://www.w3.org/1999/xlink' to be abbreviated as 'xlink' or something like that). Other XML serializers offer that. I envision this as a function, the library would fall back to generate the abbreviation whenever the function returns #f. The question on whether this patch (or whatever it evolves into) has a chance of getting into Guile is still open: I'd have to get my papers from the FSF in this case. Inputs?
diff --git a/module/sxml/simple.scm b/module/sxml/simple.scm index 703ad91..86b0784 100644 --- a/module/sxml/simple.scm +++ b/module/sxml/simple.scm @@ -215,29 +215,38 @@ port." (elements (reverse (parser port '())))) `(*TOP* ,@elements))) -(define check-name - (let ((*good-cache* (make-hash-table))) - (lambda (name) - (if (not (hashq-ref *good-cache* name)) - (let* ((str (symbol->string name)) - (i (string-index str #\:)) - (head (or (and i (substring str 0 i)) str)) - (tail (and i (substring str (1+ i))))) - (and i (string-index (substring str (1+ i)) #\:) - (error "Invalid QName: more than one colon" name)) - (for-each - (lambda (s) - (and s - (or (char-alphabetic? (string-ref s 0)) - (eq? (string-ref s 0) #\_) - (error "Invalid name starting character" s name)) - (string-for-each - (lambda (c) - (or (char-alphabetic? c) (string-index "0123456789.-_" c) - (error "Invalid name character" c s name))) - s))) - (list head tail)) - (hashq-set! *good-cache* name #t)))))) +(define (ns-lookup ns nsmap) + "Look up namespace ns in nsmap. Return its abbreviation or #f" + (assoc-ref nsmap ns)) + +(define ns-abbr-new + (let ((*nscounter* 0)) + (lambda () + (set! *nscounter* (1+ *nscounter*)) + (string-append "ns" (number->string *nscounter*))))) + +(define (ns-abbr name nsmap) + "Takes a QName, SXML style (i.e a symbol whose string value is either a +clean local name or a colon-concatenated pair of namespace:name, and returns +two values: the string <nsabbrev>:<local-name> and either a pair (<namespace> . +nsabbrev) whenever <namespace> wasn't in nsmap, or #f when it was" + ;; FIXME check for empty ns (e.g ":foo") + ;; check (worse!) for empty locname (e.g. "foo:") + (let* ((str (symbol->string name)) + (i (string-rindex str #\:)) + (ns (and i (substring str 0 i))) + (locname (or (and i (substring str (1+ i))) str))) + (if ns + (let ((nsabbr (ns-lookup ns nsmap))) + (if nsabbr + ;; known namespace: + (values (string-append nsabbr ":" locname) #f) + ;; unknown namespace + (let ((nsabbr (ns-abbr-new))) + (values (string-append nsabbr ":" locname) + (cons ns nsabbr))))) + ;; empty namespace: clean local-name: + (values locname #f)))) ;; The following two functions serialize tags and attributes. They are ;; being used in the node handlers for the post-order function, see @@ -260,42 +269,58 @@ port." port)))) (define (attribute->xml attr value port) - (check-name attr) (display attr port) (display "=\"" port) (attribute-value->xml value port) (display #\" port)) -(define (element->xml tag attrs body port) - (check-name tag) - (display #\< port) - (display tag port) - (if attrs - (let lp ((attrs attrs)) - (if (pair? attrs) - (let ((attr (car attrs))) +(define (element->xml tag attrs body port nsmap) + (let ((new-namespaces '())) + (call-with-values (lambda () (ns-abbr tag nsmap)) + (lambda (abname new-ns) + (when new-ns + (set! new-namespaces (cons new-ns new-namespaces))) + (display #\< port) + (display abname port) + (if attrs + (let lp ((attrs attrs)) + (if (pair? attrs) + (let ((attr (car attrs))) + (display #\space port) + (if (pair? attr) + (call-with-values (lambda () (ns-abbr (car attr) nsmap)) + (lambda (abname new-ns) + (when new-ns + (set! new-namespaces (cons new-ns new-namespaces))) + (attribute->xml abname (cdr attr) port))) + (error "bad attribute" tag attr)) + (lp (cdr attrs))) + (if (not (null? attrs)) + (error "bad attributes" tag attrs))))) + ;; Output namespace declarations + (let lp ((new-namespaces new-namespaces)) + (unless (null? new-namespaces) + ;; remember: car is namespace, cdr is abbrev + (let ((ns (caar new-namespaces)) + (nsabbr (cdar new-namespaces))) (display #\space port) - (if (pair? attr) - (attribute->xml (car attr) (cdr attr) port) - (error "bad attribute" tag attr)) - (lp (cdr attrs))) - (if (not (null? attrs)) - (error "bad attributes" tag attrs))))) - (if (pair? body) - (begin - (display #\> port) - (let lp ((body body)) - (cond - ((pair? body) - (sxml->xml (car body) port) - (lp (cdr body))) - ((null? body) - (display "</" port) - (display tag port) - (display ">" port)) - (else - (error "bad element body" tag body))))) - (display " />" port))) + (attribute->xml (string-append "xmlns:" nsabbr) ns port)) + (lp (cdr new-namespaces)))) + (if (pair? body) + (begin + (display #\> port) + (let lp ((body body)) + (cond + ((pair? body) + (sxml->xml (car body) port (append new-namespaces nsmap)) + (lp (cdr body))) + ((null? body) + (display "</" port) + (display abname port) + (display ">" port)) + (else + (error "bad element body" tag body))))) + (display " />" port)))))) ;; FIXME: ensure name is valid (define (entity->xml name port) @@ -311,7 +336,8 @@ port." (display str port) (display "?>" port)) -(define* (sxml->xml tree #:optional (port (current-output-port))) +(define* (sxml->xml tree #:optional (port (current-output-port)) + (nsmap '())) "Serialize the sxml tree @var{tree} as XML. The output will be written to the current output port, unless the optional argument @var{port} is present." @@ -322,7 +348,7 @@ present." (let ((tag (car tree))) (case tag ((*TOP*) - (sxml->xml (cdr tree) port)) + (sxml->xml (cdr tree) port nsmap)) ((*ENTITY*) (if (and (list? (cdr tree)) (= (length (cdr tree)) 1)) (entity->xml (cadr tree) port) @@ -336,9 +362,9 @@ present." (attrs (and (pair? elems) (pair? (car elems)) (eq? '@ (caar elems)) (cdar elems)))) - (element->xml tag attrs (if attrs (cdr elems) elems) port))))) + (element->xml tag attrs (if attrs (cdr elems) elems) port nsmap))))) ;; A nodelist. - (for-each (lambda (x) (sxml->xml x port)) tree))) + (for-each (lambda (x) (sxml->xml x port nsmap)) tree))) ((string? tree) (string->escaped-xml tree port)) ((null? tree) *unspecified*)
signature.asc
Description: Digital signature