Package: bibtex2html Version: 1.91-1 Severity: minor
Hi, in my use of bib2bib I discovered that the õ character was not handled. Thus I added it to latex_accents.mll. I also made the following changes to it: - Other latin-1 diacritics (Ç, Ã, etc) - I removed the "\\I" "letters": to my knowledge only \i exists so as to remove the point above the "i". No need of a \I as it already lacks this point - I added "\\i}" because it was not able to handle entries like: author = {Col{\"\i}n}, for instance. The first "{" is taken by next_char but once "\\"" has been lexed quote_char does not know about "\\i}", hence my addition - I also added the "{I}" char I hoped I did not misinterpret the inner workings of latex_accents.mll, see the attached diff. On that note, I also discovered that fields like: author = {Tr{\" e}ma and Cl{\' e}s}, were not correctly matched by a regex condition. One of the cause seems to come from the fact that latex_accents.mll does not take inner spaces into account. Other experiments seem to also suggest something in condition_lexer and/or bibtex_lexer, although I'm far from sure. I got very confused between the OCaml escapings of characters, the escapings I had to do in my shell and the escapings in the regex, and all the lexers, thus I will not attempt to touch it and trust upstream here :-) -- System Information: Debian Release: lenny/sid APT prefers testing APT policy: (500, 'testing') Architecture: i386 (i686) Kernel: Linux 2.6.22 Locale: LANG=fr_FR.UTF-8, LC_CTYPE=fr_FR.UTF-8 (charmap=UTF-8) Shell: /bin/sh linked to /bin/bash Versions of packages bibtex2html depends on: ii ocaml-base-nox [ocaml-base-no 3.10.0-13 Runtime system for ocaml bytecode ii perl 5.8.8-12 Larry Wall's Practical Extraction ii texlive-base 2007-13 TeX Live: Essential programs and f bibtex2html recommends no packages. -- no debconf information
--- latex_accents.mll.backup 2008-02-22 19:09:59.000000000 +0100 +++ latex_accents.mll 2008-02-22 20:03:46.000000000 +0100 @@ -37,7 +37,13 @@ | '{' { next_char lexbuf } | '}' { next_char lexbuf } | 'ç' { add_string "ç" ; next_char lexbuf } + | 'Ç' { add_string "Ç" ; next_char lexbuf } | 'ñ' { add_string "ñ"; next_char lexbuf } + | 'Ñ' { add_string "Ñ"; next_char lexbuf } + | 'ã' { add_string "ã"; next_char lexbuf } + | 'Ã' { add_string "Ã"; next_char lexbuf } + | 'õ' { add_string "õ"; next_char lexbuf } + | 'Õ' { add_string "Õ"; next_char lexbuf } | 'ä' { add_string "ä"; next_char lexbuf } | 'ö' { add_string "ö"; next_char lexbuf } | 'ü' { add_string "ü"; next_char lexbuf } @@ -90,25 +96,27 @@ | '`' { left_accent lexbuf } | '^' { hat lexbuf } | "c{c}" { add_string "ç" ; next_char lexbuf } +| "c{C}" { add_string "Ç" ; next_char lexbuf } | 'v' { czech lexbuf } -| ("~n"|"~{n}") { add_string "ñ"; next_char lexbuf } +| '~' { tilde lexbuf } | _ { add_string "\\" ; add lexbuf ; next_char lexbuf } | eof { add_string "\\" } (* called when we have seen "\\\"" *) and quote_char = parse - ('a'|"{a}") { add_string "ä" ; next_char lexbuf } -| ('o'|"{o}") { add_string "ö" ; next_char lexbuf } -| ('u'|"{u}") { add_string "ü" ; next_char lexbuf } -| ('e'|"{e}") { add_string "ë" ; next_char lexbuf } -| ('A'|"{A}") { add_string "Ä" ; next_char lexbuf } -| ('O'|"{O}") { add_string "Ö" ; next_char lexbuf } -| ('U'|"{U}") { add_string "Ü" ; next_char lexbuf } -| ('E'|"{E}") { add_string "Ë" ; next_char lexbuf } -| ("\\i" space+|"{\\i}") { add_string "ï" ; next_char lexbuf } -| ('I'|"\\I" space+|"{\\I}") { add_string "Ï" ; next_char lexbuf } -| _ { add_string "\\\"" ; add lexbuf } -| eof { add_string "\\\"" } + ('a'|"{a}") { add_string "ä" ; next_char lexbuf } +| ('o'|"{o}") { add_string "ö" ; next_char lexbuf } +| ('u'|"{u}") { add_string "ü" ; next_char lexbuf } +| ('e'|"{e}") { add_string "ë" ; next_char lexbuf } +| ('A'|"{A}") { add_string "Ä" ; next_char lexbuf } +| ('O'|"{O}") { add_string "Ö" ; next_char lexbuf } +| ('U'|"{U}") { add_string "Ü" ; next_char lexbuf } +| ('E'|"{E}") { add_string "Ë" ; next_char lexbuf } +| ('i'|"{i}"|"\\i" space+|"{\\i}"|"\\i}") + { add_string "ï" ; next_char lexbuf } +| ('I'|"{I}") { add_string "Ï" ; next_char lexbuf } +| _ { add_string "\\\"" ; add lexbuf } +| eof { add_string "\\\"" } (* called when we have seen "\\'" *) and right_accent = parse @@ -120,9 +128,10 @@ | ('O'|"{O}") { add_string "Ó" ; next_char lexbuf } | ('U'|"{U}") { add_string "Ú" ; next_char lexbuf } | ('E'|"{E}") { add_string "É" ; next_char lexbuf } -| ('\'') { add_string "”" ; next_char lexbuf } -| ('i'|"\\i" space+|"{\\i}") { add_string "í" ; next_char lexbuf } -| ('I'|"\\I" space+|"{\\I}") { add_string "Í" ; next_char lexbuf } +| ('\'') { add_string "”" ; next_char lexbuf } +| ('i'|"{i}"|"\\i" space+|"{\\i}"|"\\i}") + { add_string "í" ; next_char lexbuf } +| ('I'|"{I}") { add_string "Í" ; next_char lexbuf } | _ { add_string "\\'" ; add lexbuf ; next_char lexbuf } | eof { add_string "\\'" } @@ -136,12 +145,14 @@ | ('O'|"{O}") { add_string "Ò" ; next_char lexbuf } | ('U'|"{U}") { add_string "Ù" ; next_char lexbuf } | ('E'|"{E}") { add_string "È" ; next_char lexbuf } -| ('`') { add_string "“" ; next_char lexbuf } -| ('i'|"\\i" space+ |"{\\i}") { add_string "ì" ; next_char lexbuf } -| ('I'|"\\I" space+ |"{\\I}") { add_string "Ì" ; next_char lexbuf } +| ('`') { add_string "“" ; next_char lexbuf } +| ('i'|"{i}"|"\\i" space+ |"{\\i}"|"\\i}") + { add_string "ì" ; next_char lexbuf } +| ('I'|"{I}") { add_string "Ì" ; next_char lexbuf } | _ { add_string "\\`" ; add lexbuf ; next_char lexbuf } | eof { add_string "\\`" } +(* called when we have seen "\\^" *) and hat = parse ('a'|"{a}") { add_string "â" ; next_char lexbuf } | ('o'|"{o}") { add_string "ô" ; next_char lexbuf } @@ -151,18 +162,32 @@ | ('O'|"{O}") { add_string "Ô" ; next_char lexbuf } | ('U'|"{U}") { add_string "Û" ; next_char lexbuf } | ('E'|"{E}") { add_string "Ê" ; next_char lexbuf } -| ('i'|"\\i" space+ |"{\\i}") { add_string "î" ; next_char lexbuf } -| ('I'|"\\I" space+ |"{\\I}") { add_string "Î" ; next_char lexbuf } +| ('i'|"{i}"|"\\i" space+ |"{\\i}"|"\\i}") + { add_string "î" ; next_char lexbuf } +| ('I'|"{I}") { add_string "Î" ; next_char lexbuf } | _ { add_string "\\^" ; add lexbuf ; next_char lexbuf } | eof { add_string "\\^" } +(* called when we have seen "\\~" *) +and tilde = parse + ('a'|"{a}") { add_string "ã" ; next_char lexbuf } +| ('o'|"{o}") { add_string "õ" ; next_char lexbuf } +| ('A'|"{A}") { add_string "Ã" ; next_char lexbuf } +| ('O'|"{O}") { add_string "Õ" ; next_char lexbuf } +| ('n'|"{n}") { add_string "ñ" ; next_char lexbuf } +| ('N'|"{N}") { add_string "Ñ" ; next_char lexbuf } +| _ { add_string "\\~" ; add lexbuf ; next_char lexbuf } +| eof { add_string "\\~" } + +(* called when we have seen "\\v" *) and czech = parse ('r'|"{r}") { add_string "ř" ; next_char lexbuf } | ('R'|"{R}") { add_string "Ř" ; next_char lexbuf } | ('s'|"{s}") { add_string "š" ; next_char lexbuf } | ('S'|"{S}") { add_string "Š" ; next_char lexbuf } -| ('i'|"\\i" space+ |"{\\i}") { add_string "ĭ" ; next_char lexbuf } -| ('I'|"\\I" space+ |"{\\I}") { add_string "Ĭ" ; next_char lexbuf } +| ('i'|"{i}"|"\\i" space+ |"{\\i}"|"\\i}") + { add_string "ĭ" ; next_char lexbuf } +| ('I'|"{I}") { add_string "Ĭ" ; next_char lexbuf } | _ { add_string "\\^" ; add lexbuf ; next_char lexbuf } | eof { add_string "\\^" }