Re: problem reading XML

2008-08-09 Thread Tomas Hlavaty
Hi Alex,

> I suggest to slightly modify that loop
>
>  (make
> (loop
>(NIL (link (char)) (quit "Unbalanced CDATA"))
>(T (= '`(chop "]]>") (tail 3 (made ) ) ) )

I changed the error message to (quit "Unbalanced XML CDATA") so that I
can use (catch '("XML") ...)

I also added the NIL case to prevent infinite loop in DOCTYPE:

(loop
   (T (= "]" (setq X (from "]" "\"" "'" ""))
  (NIL (quit "Unbalanced XML DOCTYPE")) ) )

> Thanks! I include again a new version with the above fix.

The updated file is attached.

Cheers,

Tomas



xml.l
Description: Binary data


Re: problem reading XML

2008-08-08 Thread Alexander Burger
Hi Tomas,

>(from "]]>"))
> 
> was necessary in the parent process because (echo "]]>") in the child
> process did not seem to affect stdin of the parent process and all

Ah, yes. That's what happens. Parent and child have independent I/O
structures.


Thinking about it, I just found an error in my solution. Though it works
well for well-formed CDATA clauses, it will go into an endless loop if
the matching "]]>" is not found. Therefore, I suggest to slightly modify
that loop from

 (make
(until (= '`(chop "]]>") (tail 3 (made)))
   (link (char)) ) )

to

 (make
(loop
   (NIL (link (char)) (quit "Unbalanced CDATA"))
   (T (= '`(chop "]]>") (tail 3 (made ) ) ) )



> I'll run & test your changed code and let you know.

Thanks! I include again a new version with the above fix.

Best regards,
- Alex
# 09aug08abu
# 09aug08 Tomas Hlavaty <[EMAIL PROTECTED]>

# Check or write header
(de xml? (Flg)
   (if Flg
  (prinl "")
  (skip)
  (prog1
 (head '("<" "?" "x" "m" "l") (till ">"))
 (char) ) ) )

# Generate/Parse XML data
# expects well formed XML
# encoding by picolisp (utf8 "only", no utf16 etc.)
# trim whitespace except in cdata
# ignore  ent
(de xml (Lst N)
   (if Lst
  (let Tag (pop 'Lst)
 (space (default N 0))
 (prin "<" Tag)
 (for X (pop 'Lst)
(prin " " (car X) "=\"")
(escXml (cdr X))
(prin "\"") )
 (nond
(Lst (prinl "/>"))
((or (cdr Lst) (pair (car Lst)))
   (prin ">")
   (escXml (car Lst))
   (prinl "") )
(NIL
   (prinl ">")
   (for X Lst
  (if (pair X)
 (xml X (+ 3 N))
 (space (+ 3 N))
 (escXml X)
 (prinl) ) )
   (space N)
   (prinl "") ) ) )
  (_xml) ) )

(de _xml (In Char)
   (unless Char
  (skip)
  (unless (= "<" (char))
 (quit "Bad XML") ) )
   (case (peek)
  ("?"
 (from "?>")
 (unless In (_xml In)) )
  ("!"
 (char)
 (case (peek)
("-"
   (ifn (= "-" (char) (char))
  (quit "XML comment expected")
  (from "-->")
  (unless In (_xml In)) ) )
("D"
   (if (find '((C) (<> C (char))) '`(chop "DOCTYPE"))
  (quit "XML DOCTYPE expected")
  (when (= "[" (from "[" ">"))
 (use X
(loop
   (T (= "]" (setq X (from "]" "\"" "'" "")) ) ) )
 (from ">") )
  (unless In (_xml In)) ) )
("["
   (if (find '((C) (<> C (char))) '`(chop "[CDATA["))
  (quit "XML CDATA expected")
  (pack
 (head -3
(make
   (loop
  (NIL (link (char)) (quit "Unbalanced CDATA"))
  (T (= '`(chop "]]>") (tail 3 (made ) ) ) ) ) )
(T (quit "Unhandled XML tag")) ) )
  (T
 (let Tok (till " ^I^M^J/>" T)
(use X
   (make
  (link (intern (pack Tok)))
  (let L
 (make
(loop
   (NIL (skip) (quit "Unexpected end of XML" Tok))
   (T (member @ '("/" ">")))
   (NIL (setq X (intern (pack (trim (till "="))
   (char)
   (skip)
   (let C (char)
  (unless (member C '("\"" "'"))
 (quit "XML attribute quote expected" X) )
  (link (cons X (pack (xmlEsc (till C) )
   (char) ) )
 (if (= "/" (char))
(prog (char) (and L (link L)))
(link L)
(loop
   (NIL (skip) (quit "Unexpected end of XML" Tok))
   (T (and (= "<" (setq X (char))) (= "/" (peek)))
  (char)
  (unless (= Tok (till " ^I^M^J/>" T))
 (quit "Unbalanced XML" Tok) )
  (skip)
  (char) )
   (if (= "<" X)
  (when (_xml T "<")
 (link @) )
  (link
 (pack (xmlEsc (trim (cons X (till "^M^J<") 
) ) ) ) ) ) ) ) ) ) )

(de xmlEsc (L)
   (use (@A @X @Z)
  (make
 (while L
(ifn (match '("&" @X ";" @Z) L)
   (link (pop 'L))
  

Re: problem reading XML

2008-08-08 Thread Tomas Hlavaty
Hi Alex,

> I'll run & test your changed code and let you know.

the updated file xml.l is attached.

Cheers,

Tomas



xml.l
Description: Binary data


Re: problem reading XML

2008-08-08 Thread Tomas Hlavaty
Hi Alex,

> 1. There was a slight error (still '_xml2' instead of '_xml')

Oops, sorry about that.

> 2. I replaced occurrences like
>
>   (ifn (= '`(chop "DOCTYPE")
>  (list (char) (char) (char) (char) (char) (char) (char)))
>
>with
>
>   (if (find '((C) (<> C (char))) '`(chop "DOCTYPE"))
>
>to avoid the excessive 'list'ing.

That's what I was looking for:-)

> 3. The case "??? echo to string?" is a bit cumbersome. There is in fact
>no proper equivalent of the 'echo' functionality in e.g. 'from'.
>Using 'pipe' is quite elegant and short, but I'm hesitating to use it
>is such a context.
>
>   (pipe (echo "]]>") (till NIL T))
>
>My proposal would be
>
>   (pack
>  (head -3
> (make
>(until (= '`(chop "]]>") (tail 3 (made)))
>   (link (char)) ) ) ) )
>
>This avoids the overhead of 'pipe', but is longer. Is this OK?

Yes, better solution without pipe.  I think we don't we need prog1 and
(from "]]>") in that case.  It'll take me a while to understand it
though;-)

> BTW, why is the
>
>(from "]]>") ) ) )
>
> in the following line needed?

(from "]]>") in the original code

(prog1 # ??? echo to string?
   (pipe (echo "]]>") (till NIL T))
   (from "]]>"))

was necessary in the parent process because (echo "]]>") in the child
process did not seem to affect stdin of the parent process and all
those characters had to be read again in the parent process.  I am not
sure why it works this way.

>> : 3c64 6f63 3ef0 9080 80f4 8fbf bd3c 2f64  > 0010: 6f63 3e  oc>
>
> The first character after the '>' starts with "F0", which is not a legal
> UTF-8 sequence. UTF-8 has the following structure ("doc/utf8"):
>
> .. 007F   0xxx
>0080 .. 07FF   110x 10xx
>0800 ..    1110 10xx 10xx
>
> The pattern  is this not possible for any (not just the first)
> UTF-8 character.

Thanks for explanation, I'll ignore the invalid test cases then.

Sorry, I did not send you this, but

(de xml (Lst N)
   (if Lst
  (let Tag (pop 'Lst)
 (space (default N 0))
 (prin "<" Tag)
 (for X (pop 'Lst)
(prin " " (car X) "=\"")
(escXml (cdr X))
(prin "\"") )
 (nond
(Lst (prinl "/>"))
((or (cdr Lst) (pair (car Lst)))
   (prin ">")
   (escXml (car Lst))
   (prinl "") )
(NIL
   (prinl ">")
   (for X Lst
  (if (pair X)
 (xml X (+ 3 N))
 (space (+ 3 N))
 (escXml X)
 (prinl) ) )
   (space N)
   (prinl "") ) ) )
  (_xml (till " /<>" T)) ) )

should be just

(de xml (Lst N)
   (if Lst
  (let Tag (pop 'Lst)
 (space (default N 0))
 (prin "<" Tag)
 (for X (pop 'Lst)
(prin " " (car X) "=\"")
(escXml (cdr X))
(prin "\"") )
 (nond
(Lst (prinl "/>"))
((or (cdr Lst) (pair (car Lst)))
   (prin ">")
   (escXml (car Lst))
   (prinl "") )
(NIL
   (prinl ">")
   (for X Lst
  (if (pair X)
 (xml X (+ 3 N))
 (space (+ 3 N))
 (escXml X)
 (prinl) ) )
   (space N)
   (prinl "") ) ) )
  (_xml ) ) )

I'll run & test your changed code and let you know.

Cheers,

Tomas
-- 
UNSUBSCRIBE: mailto:[EMAIL PROTECTED]


Re: problem reading XML -- Attachment

2008-08-07 Thread Alexander Burger
On Fri, Aug 08, 2008 at 07:32:58AM +0200, Alexander Burger wrote:
> I did some minor changes, and attach a new version of "lib/xml.l" to

Stupid me! Forgot the attachment :-(
# 08aug08abu
# 08aug08 Tomas Hlavaty <[EMAIL PROTECTED]>

# Check or write header
(de xml? (Flg)
   (if Flg
  (prinl "")
  (skip)
  (prog1
 (head '("<" "?" "x" "m" "l") (till ">"))
 (char) ) ) )

# Generate/Parse XML data
# expects well formed XML
# encoding by picolisp (utf8 "only", no utf16 etc.)
# trim whitespace except in cdata
# ignore  ent
(de xml (Lst N)
   (if Lst
  (let Tag (pop 'Lst)
 (space (default N 0))
 (prin "<" Tag)
 (for X (pop 'Lst)
(prin " " (car X) "=\"")
(escXml (cdr X))
(prin "\"") )
 (nond
(Lst (prinl "/>"))
((or (cdr Lst) (pair (car Lst)))
   (prin ">")
   (escXml (car Lst))
   (prinl "") )
(NIL
   (prinl ">")
   (for X Lst
  (if (pair X)
 (xml X (+ 3 N))
 (space (+ 3 N))
 (escXml X)
 (prinl) ) )
   (space N)
   (prinl "") ) ) )
  (_xml (till " /<>" T)) ) )

(de _xml (In Char)
   (unless Char
  (skip)
  (unless (= "<" (char))
 (quit "Bad XML") ) )
   (case (peek)
  ("?"
 (from "?>")
 (unless In (_xml In)) )
  ("!"
 (char)
 (case (peek)
("-"
   (ifn (= "-" (char) (char))
  (quit "XML comment expected")
  (from "-->")
  (unless In (_xml In)) ) )
("D"
   (if (find '((C) (<> C (char))) '`(chop "DOCTYPE"))
  (quit "XML DOCTYPE expected")
  (when (= "[" (from "[" ">"))
 (use X
(loop
   (T (= "]" (setq X (from "]" "\"" "'" "")) ) ) )
 (from ">") )
  (unless In (_xml In)) ) )
("["
   (if (find '((C) (<> C (char))) '`(chop "[CDATA["))
  (quit "XML CDATA expected")
  (prog1
 (pack
(head -3
   (make
  (until (= '`(chop "]]>") (tail 3 (made)))
 (link (char)) ) ) ) )
 (from "]]>") ) ) )
(T (quit "Unhandled XML tag")) ) )
  (T
 (let Tok (till " ^I^M^J/>" T)
(use X
   (make
  (link (intern (pack Tok)))
  (let L
 (make
(loop
   (NIL (skip) (quit "Unexpected end of XML" Tok))
   (T (member @ '("/" ">")))
   (NIL (setq X (intern (pack (trim (till "="))
   (char)
   (skip)
   (let C (char)
  (unless (member C '("\"" "'"))
 (quit "XML attribute quote expected" X) )
  (link (cons X (pack (xmlEsc (till C) )
   (char) ) )
 (if (= "/" (char))
(prog (char) (and L (link L)))
(link L)
(loop
   (NIL (skip) (quit "Unexpected end of XML" Tok))
   (T (and (= "<" (setq X (char))) (= "/" (peek)))
  (char)
  (unless (= Tok (till " ^I^M^J/>" T))
 (quit "Unbalanced XML" Tok) )
  (skip)
  (char) )
   (if (= "<" X)
  (when (_xml T "<")
 (link @) )
  (link
 (pack (xmlEsc (trim (cons X (till "^M^J<") 
) ) ) ) ) ) ) ) ) ) )

(de xmlEsc (L)
   (use (@A @X @Z)
  (make
 (while L
(ifn (match '("&" @X ";" @Z) L)
   (link (pop 'L))
   (link
  (cond
 ((= @X '`(chop "quot")) "\"")
 ((= @X '`(chop "amp")) "&")
 ((= @X '`(chop "lt")) "<")
 ((= @X '`(chop "gt")) ">")
 ((= @X '`(chop "apos")) "'")
 ((= "#" (car @X))
(char
   (if (= "x" (cadr @X))
  (hex (cddr @X))
  (format (pack (cdr @X))) ) ) )
 (T @X) ) )
   (setq L @Z) ) ) ) ) )

(de escXml (X)
   (for C (chop X)
  (if (member C '`(chop "\"&<"))
 (prin "&#" (char C) ";")
 (pr

Re: problem reading XML

2008-08-07 Thread Alexander Burger
Hi Tomas,

many thanks!

I did some minor changes, and attach a new version of "lib/xml.l" to
this mail.

1. There was a slight error (still '_xml2' instead of '_xml')


2. I replaced occurrences like

  (ifn (= '`(chop "DOCTYPE")
 (list (char) (char) (char) (char) (char) (char) (char)))

   with

  (if (find '((C) (<> C (char))) '`(chop "DOCTYPE"))

   to avoid the excessive 'list'ing.


3. The case "??? echo to string?" is a bit cumbersome. There is in fact
   no proper equivalent of the 'echo' functionality in e.g. 'from'.
   Using 'pipe' is quite elegant and short, but I'm hesitating to use it
   is such a context.

  (pipe (echo "]]>") (till NIL T))

   My proposal would be

  (pack
 (head -3
(make
   (until (= '`(chop "]]>") (tail 3 (made)))
  (link (char)) ) ) ) )

   This avoids the overhead of 'pipe', but is longer. Is this OK?


BTW, why is the

   (from "]]>") ) ) )

in the following line needed?



> : 3c64 6f63 3ef0 9080 80f4 8fbf bd3c 2f64   0010: 6f63 3e  oc>

The first character after the '>' starts with "F0", which is not a legal
UTF-8 sequence. UTF-8 has the following structure ("doc/utf8"):

    .. 007F   0xxx
   0080 .. 07FF   110x 10xx
   0800 ..    1110 10xx 10xx

The pattern  is this not possible for any (not just the first)
UTF-8 character.

Cheers,
- Alex
-- 
UNSUBSCRIBE: mailto:[EMAIL PROTECTED]


Re: problem reading XML

2008-08-06 Thread Tomas Hlavaty
Hi Alex,

> So please take your time, and send it to me when ready.

here is the XML parser:

# expects well formed XML
# encoding by picolisp (utf8 "only", no utf16 etc.)
# trim whitespace except in cdata
# ignore  ent
(de _xml (In Char)
   (unless Char
  (skip)
  (unless (= "<" (char))
 (quit "Bad XML") ) )
   (case (peek)
  ("?"
   (from "?>")
   (unless In (_xml2 In)))
  ("!"
   (char)
   (case (peek)
  ("-"
   (ifn (= '`(chop "--") (list (char) (char)))
  (quit "XML comment expected")
  (from "-->")
  (unless In (_xml2 In
  ("D"
   (ifn (= '`(chop "DOCTYPE")
 (list (char) (char) (char) (char) (char) (char) (char)))
  (quit "XML DOCTYPE expected")
  (when (= "[" (from "[" ">"))
 (use X
(loop
   (T (= "]" (setq X (from "]" "\"" "'" "")
 (from ">"))
  (unless In (_xml2 In
  ("["
   (ifn (= '`(chop "[CDATA[")
 (list (char) (char) (char) (char) (char) (char) (char)))
  (quit "XML CDATA expected")
  (prog1 # ??? echo to string?
 (pipe (echo "]]>") (till NIL T))
 (from "]]>"
  (T (quit "Unhandled XML tag"
  (T
   (let Tok (till " ^I^M^J/>" T)
  (use X
 (make
(link (intern (pack Tok)))
(let L
   (make
  (loop
 (NIL (skip) (quit "Unexpected end of XML" Tok))
 (T (member @ '("/" ">")))
 (NIL (setq X (intern (pack (trim (till "="))
 (char)
 (skip)
 (let C (char)
(unless (member C '("\"" "'"))
   (quit "XML attribute quote expected" X) )
(link (cons X (pack (xmlEsc (till C))
 (char) ) )
   (if (= "/" (char))
  (prog (char) (and L (link L)))
  (link L)
  (loop
 (NIL (skip) (quit "Unexpected end of XML" Tok))
 (T (and (= "<" (setq X (char))) (= "/" (peek)))
(char)
(unless (= Tok (till " ^I^M^J/>" T))
   (quit "Unbalanced XML" Tok) )
(skip)
(char) )
 (if (= "<" X)
(when (_xml2 T "<")
   (link @))
(link
   (pack (xmlEsc (trim (cons X (till 
"^M^J<"

_xml should be called from xml function as (_xml), like the previous
_xml2 code.

I found a few XML files where the above code fails but this is due to:

1) utf-8 byte order mark is not understood.  The UTF-8 representation
   of the BOM is the byte sequence EF BB BF.  I am not sure how others
   handle this but it should not be part of the parser probably.

2) some "unusual" utf-8 characters.  Not sure why I get these
   failures.  However, this is quite minor problem and might be a bug
   in picolisp utf-8 fandling code?

   Example file:

: 3c64 6f63 3ef0 9080 80f4 8fbf bd3c 2f64  

   Any ideas?

Also, I think that a better way of checking whether a file is an XML
file is:

(de xml! (F)
   (call "sh" "-c" (pack "xmlstarlet val -w -q " F " 1>/dev/null 2>/dev/null")))

or something like that, using a specialized validation program.

Another check might be encoding check like:

(in (list "enca" "-L" "none" F)
   (not (from "UCS-2" "Unrecognized" "non-text")))

I hope people will find this useful.

Thanks,

Tomas
-- 
UNSUBSCRIBE: mailto:[EMAIL PROTECTED]


Re: problem reading XML

2008-07-29 Thread Alexander Burger
Hi Tomas,

> Would not it be better to have only one function to parse xml in
> picolisp distribution?  I will have to refine it and test it more
> thouroughly though.

OK, then let's do it that way. I intended to keep two versions, because
I thought it nice to have a lightweight XML (completely without comments
then), too. But this might get confusing.

So please take your time, and send it to me when ready.


> > b="2">
> ...
> It is not a valid xml:

Didn't know that. I thought that a comment is equivalent to white space
as in other languages.


> Filtering comments by a preprocessor would not work in general
> ...
> 14

I see your point.

So incorporating comments into the general parser is a good idea.

Cheers,
- Alex
-- 
UNSUBSCRIBE: mailto:[EMAIL PROTECTED]


Re: problem reading XML

2008-07-28 Thread Tomas Hlavaty
Hi Alex,

> I would like to put it into a file "lib/xml2.l" for the next
> release, crediting you as the author (name and email). Do I have
> your permission?

Sure, that would be an honour!

> about what goes into the basic release, to protect it from blowing
> up too much (and thus being no longer "pico").

Would not it be better to have only one function to parse xml in
picolisp distribution?  I will have to refine it and test it more
thouroughly though.

> One problem, though, I still see with comments. As they are parsed
> on the tag level, they won't work if they are nested somewhere else,
> e.g.
>
> b="2">
>   123
>

It is not a valid xml:

XML Parsing Error: not well-formed
Location: file:///tmp/a.xml
Line Number 1, Column 11:
 b="2">123
--^

A comment can be inside an element:

1234

xml2 returns (hi ((a . "1") (b . "2")) "12" "34")

Filtering comments by a preprocessor would not work in general
unfortunatelly because something like:

14

is a valid xml and the comment is not a comment but an ordinary
text:-(

Thanks,

Tomas
-- 
UNSUBSCRIBE: mailto:[EMAIL PROTECTED]


Re: problem reading XML

2008-07-28 Thread Alexander Burger
Hi Tomas,

> since the XML declaration is optional, I would need to write:
> 
> (or (in F (pipe (while (and (echo "")))
>  (and (xml?) (xml
> (in F (pipe (while (and (echo "")))
>  (xml

Yes, this is quite inconvenient.
I didn't know that the header is optional.


> Would not it be better if the xml function simply handled it all, the
> xml declaration, comments and xml elements?
> ...
> The following code handles optional declarations and comments

Great, that's a nice solution!

I would like to put it into a file "lib/xml2.l" for the next release,
crediting you as the author (name and email). Do I have your permission?


One problem, though, I still see with comments. As they are parsed on
the tag level, they won't work if they are nested somewhere else, e.g.

b="2">
  123
   


> > Is this mailing list the right place to decide such changes?
> How does current decision making work?

Good question. There's no fixed procedure, but I'd like to keep control
about what goes into the basic release, to protect it from blowing up
too much (and thus being no longer "pico").

As there is no other official forum for PicoLisp, it might be acceptable
if we consider this mailing list as the authoritative platform.

Cheers,
- Alex
-- 
UNSUBSCRIBE: mailto:[EMAIL PROTECTED]


Re: problem reading XML

2008-07-27 Thread Tomas Hlavaty
Hi Alex,

>(in "file
>   (pipe (while (and (echo "")))
>  (xml) ) )
>
> I fact, I would like to remove that half-hearted comment feature from
> "lib/xml.l". Does anybody have objections?

since the XML declaration is optional, I would need to write:

(or (in F (pipe (while (and (echo "")))
 (and (xml?) (xml
(in F (pipe (while (and (echo "")))
 (xml

to load xml data.

Would not it be better if the xml function simply handled it all, the
xml declaration, comments and xml elements?  It looks like the current
code is not far from it.

The following code handles optional declarations and comments (both
inside and outside the root element):

(de xml2 (Lst N)
   (if Lst
  (let Tag (pop 'Lst)
 (space (default N 0))
 (prin "<" Tag)
 (for X (pop 'Lst)
(prin " " (car X) "=\"")
(escXml (cdr X))
(prin "\"") )
 (nond
(Lst (prinl "/>"))
((or (cdr Lst) (pair (car Lst)))
   (prin ">")
   (escXml (car Lst))
   (prinl "") )
(NIL
   (prinl ">")
   (for X Lst
  (if (pair X)
 (xml X (+ 3 N))
 (space (+ 3 N))
 (escXml X)
 (prinl) ) )
   (space N)
   (prinl "") ) ) )
  (_xml2) ) )

(de _xml2 (Tok Decl In)
   #(println Tok)
   (cond
  ((not Tok)
   (skip)
   (unless (= "<" (char))
  (quit "Bad XML start") )
   (_xml2 (till " /<>")))
  ((head '("?" "x" "m" "l") Tok)
   (if Decl
  (quit "XML declaration too late")
  # TODO check decl validity
  (from "?>")
  (skip)
  (unless (= "<" (char))
 (quit "Bad XML element start") )
  (_xml2 (till " /<>") T)))
  ((head '("!" "-" "-") Tok)
   (from "-->")
   (unless In
  (skip)
  (unless (= "<" (char))
 (quit "Bad XML element start") )
  (_xml2 (till " /<>") T)))
  (T
   (use X
  (make
 (link (intern (pack Tok)))
 (let L
(make
   (loop
  (NIL (skip) (quit "XML parse error 1"))
  (T (member @ '("/" ">")))
  (NIL (setq X (intern (till "=" T
  (char)
  (unless (= "\"" (char))
 (quit "XML parse error 2" X) )
  (link (cons X (pack (xmlEsc (till "\"")
  (char) ) )
(if (= "/" (char))
   (prog (char) (and L (link L)))
   (link L)
   (loop
  (NIL (skip) (quit "XML parse error 3" (pack Tok)))
  (T (and (= "<" (setq X (char))) (= "/" (peek)))
 (char)
 (unless (= Tok (till " /<>"))
(quit "Unbalanced XML" (pack Tok) ))
 (char) )
  (if (= "<" X)
 (and (_xml2 (till " /<>") T T) (link @))
 (link
(pack (xmlEsc (trim (cons X (till 
"^J<")))

It can parse the following file:




   
   123




> Is this mailing list the right place to decide such changes?

How does current decision making work?

Thanks,

Tomas
-- 
UNSUBSCRIBE: mailto:[EMAIL PROTECTED]


Re: problem reading XML

2008-07-26 Thread Alexander Burger
On Sat, Jul 26, 2008 at 10:05:05AM +0200, Alexander Burger wrote:
> Handling comments in a parser is usually not a good idea anyway, and is
> better done by a separate preprocessor.

I would recommend something like

   (in "file
  (pipe (while (and (echo "")))
 (xml) ) )

I fact, I would like to remove that half-hearted comment feature from
"lib/xml.l". Does anybody have objections? It might break something. Is
this mailing list the right place to decide such changes?

Cheers,
- Alex
-- 
UNSUBSCRIBE: mailto:[EMAIL PROTECTED]


Re: problem reading XML

2008-07-26 Thread Alexander Burger
Hi Tomas,

> It is legal to have comments before the root element (case b.xml).

You are right. Comment handling is buggy here.

Initially, the XML parser did not support comments at all, and they were
introduced later rather listlessly. Just for completeness, because I
never needed them. (This is not an excuse, of course ;-)

Handling comments in a parser is usually not a good idea anyway, and is
better done by a separate preprocessor. The '_xml' function handles them
on the tag level, causing a comment to return an empty sublist. And this
gives unexpected results if the comment is the first tag.

Cheers,
- Alex
-- 
UNSUBSCRIBE: mailto:[EMAIL PROTECTED]


Re: problem reading XML

2008-07-25 Thread Tomas Hlavaty
Hi Henrik & Alex,

thanks for your suggestions but I thing that it works for you only
because your XML data satisfy some particular assumtions:

Henrik assumes that the the ?xml declaration is present and is at the
beginning of the file.

>> (setq Lst (in "atom.xml" (and (xml?) (xml

Alex also assumes that the first line is junk or comment.  However,
the whole XML can have one line only, e.g.

123

or

123

> there might still be a problem due to the 'comment' line. Is it
> needed (or even legal)? If so, you could skip that line:

>(setq Lst (in "file" (line) (and (xml?) (xml

It is legal to have comments before the root element (case b.xml).

However, it is not legal to have comment before declaration as I found
out now (see case e.xml bellow).

I think that it should be possible to parse all these cases (files
a.xml to d.xml) without knowing which one will "arrive" upfront.  I
guess that the xml function does not handle comments properly,
otherwise it would at least be possible to write:

(or (in F (and (xml?) (xml)))
(in F (xml)))

The files a.xml to d.xml:

==> /tmp/a.xml <==
123

==> /tmp/b.xml <==

123

==> /tmp/c.xml <==

123

==> /tmp/d.xml <==


123

The file e.xml is actually invalid:

==> /tmp/e.xml <==


123

XML Parsing Error: XML or text declaration not at start of entity
Location: file:///tmp/e.xml
Line Number 2, Column 1:

^

Thanks,

Tomas
-- 
UNSUBSCRIBE: mailto:[EMAIL PROTECTED]


Re: problem reading XML

2008-07-24 Thread Alexander Burger
On Fri, Jul 25, 2008 at 09:08:50AM +0700, Henrik Sarvell wrote:
> I've had this discussion with Alex already, I ended up with this after it:
> (setq Lst (in "atom.xml" (and (xml?) (xml

That's correct. However, in Tomas's case:

   > 
   > 

there might still be a problem due to the 'comment' line. Is it needed
(or even legal)? If so, you could skip that line:

   (setq Lst (in "file" (line) (and (xml?) (xml

Cheers,
- Alex
-- 
UNSUBSCRIBE: mailto:[EMAIL PROTECTED]


Re: problem reading XML

2008-07-24 Thread Henrik Sarvell
I've had this discussion with Alex already, I ended up with this after it:
(setq Lst (in "atom.xml" (and (xml?) (xml

It will parse the whole thing into a list.

/Henrik

On Fri, Jul 25, 2008 at 6:15 AM, Tomas Hlavaty <[EMAIL PROTECTED]> wrote:

> Hi Alex,
>
> what is the way to load an XML file?  I seem to be having problems
> with some cases depending on where the ?xml declaration and comments
> are:
>
> : (load '@lib/xml.l)
> -> attr
> : (let F '/tmp/a.xml (call 'cat F) (in F (xml)))
> 123
> -> (hi NIL "123")
> : (let F '/tmp/b.xml (call 'cat F) (in F (xml)))
> 
> 123
> -> NIL
> : (let F '/tmp/c.xml (call 'cat F) (in F (xml)))
> 
> 123
> [/tmp/c.xml:1] ?>\
> 123\
>  -- XML parse error
> ?
> : (let F '/tmp/d.xml (call 'cat F) (in F (xml)))
> 
> 
> 123
> [/tmp/d.xml:1] ?>\
> \
> 123\
>  -- XML parse error
> ?
> : (let F '/tmp/e.xml (call 'cat F) (in F (xml)))
> 
> 
> 123
> -> NIL
>
> I am using picolisp-2.3.2.
>
> Thanks,
>
> Tomas
> --
> UNSUBSCRIBE: mailto:[EMAIL PROTECTED]
>

--=_Part_6557_33419765.1216951730383
Content-Type: text/html; charset=ISO-8859-1
Content-Transfer-Encoding: 7bit
Content-Disposition: inline

I've had this discussion with Alex already, I ended up with 
this after it: (setq Lst (in "atom.xml" (and (xml?) 
(xmlIt will parse the whole thing into a 
list./Henrik
On Fri, Jul 25, 2008 at 6:15 AM, Tomas Hlavaty [EMAIL PROTECTED]> wrote:
Hi Alex,

what is the way to load an XML file?  I seem to be having problems
with some cases depending on where the ?xml declaration and comments
are:

: (load '@lib/xml.l)
-> attr
: (let F '/tmp/a.xml (call 'cat F) (in F (xml)))
123
-> (hi NIL "123")
: (let F '/tmp/b.xml (call 'cat F) (in F (xml)))

123
-> NIL
: (let F '/tmp/c.xml (call 'cat F) (in F (xml)))

123
[/tmp/c.xml:1] ?>\
123\
 -- XML parse error
?
: (let F '/tmp/d.xml (call 'cat F) (in F (xml)))


123
[/tmp/d.xml:1] ?>\
\
123\
 -- XML parse error
?
: (let F '/tmp/e.xml (call 'cat F) (in F (xml)))


123
-> NIL

I am using picolisp-2.3.2.

Thanks,

Tomas
--
UNSUBSCRIBE: mailto:mailto:picolisp@software-lab.de";>picolisp@software-lab.de?subject=Unsubscribe


--=_Part_6557_33419765.1216951730383--
-- 
UNSUBSCRIBE: mailto:[EMAIL PROTECTED]
---


problem reading XML

2008-07-24 Thread Tomas Hlavaty
Hi Alex,

what is the way to load an XML file?  I seem to be having problems
with some cases depending on where the ?xml declaration and comments
are:

: (load '@lib/xml.l)
-> attr
: (let F '/tmp/a.xml (call 'cat F) (in F (xml)))
123
-> (hi NIL "123")
: (let F '/tmp/b.xml (call 'cat F) (in F (xml)))

123
-> NIL
: (let F '/tmp/c.xml (call 'cat F) (in F (xml)))

123
[/tmp/c.xml:1] ?>\
123\
 -- XML parse error
? 
: (let F '/tmp/d.xml (call 'cat F) (in F (xml)))


123
[/tmp/d.xml:1] ?>\
\
123\
 -- XML parse error
? 
: (let F '/tmp/e.xml (call 'cat F) (in F (xml)))


123
-> NIL

I am using picolisp-2.3.2.

Thanks,

Tomas
-- 
UNSUBSCRIBE: mailto:[EMAIL PROTECTED]