At least I can provide you with a bibtex parser written using the Utrecht Parser Combinators. As you will see from the text the bibtex format is actually quite intricate.

 Doaitse Swierstra



-- $Header: /data/cvs-rep/uust/examples/bibtex-parser/BibtexParser.hs,v 1.8 2002/11/08 12:48:00 uust Exp $
-- $Name: $ (version name)
{- Fast, Error Correcting Parser Combinators; Version: see Version History in same directory.
- Copyright: S. Doaitse Swierstra
Department of Computer Science
Utrecht University
P.O. Box 80.089
3508 TB UTRECHT
the Netherlands
[EMAIL PROTECTED]
-}


{- file: bibtex6.hs
   A parser for BibTeX
   using the UU parsing combinators
   Piet van Oostrum, Atze Dijkstra, Doaitse Swierstra (April 22, 2001)
-}
module Main where
import UU.Parsing
import UU.Parsing.CharParser
import UU.Scanner.Position
import System
import Char

showMessage (Msg exp pos action) =
"\nParse error: " ++ show pos ++ "\n" ++
actionMessage ++ "\n"
where actionMessage = case action of
Insert s -> "expecting: " ++ show exp ++
"\nrepaired by inserting " ++ show s
Delete s -> "expecting: " ++ show exp ++
"\nrepaired by deleting unexpected symbol " ++ show s
Other m -> m


parsebib filename -- e.g. parsebib "btxdoc.bib"
= do res <- parseFile showMessage pBibData filename
putStrLn ("\nResult:" ++ show (length res) ++ " bib items were parsed")


main
  = do args <- getArgs
       if null args
         then putStr "BibtexParser <bibfile>\n"
         else parsebib (head args)

-- ======================================================================== ===============
-- ===== DATA TYPES ======================================================================
-- ======================================================================== ===============
type BibParser = AnaParser Input Pair Char Pos


type BibData  = [ BibEntry]

data BibEntry = Entry String (String, [Field]) -- kind keyword fieldlist
| Comment String
| Preamble [ValItem]
| StringDef Field
deriving Show


type Field    = (String, [ValItem])

data ValItem = StringVal String
| IntVal Int
| NameUse String
deriving Show
-- ======================================================================== ===============
-- ===== PARSERS ======================================================================== =
-- ======================================================================== ===============
pBibData = pChainr ((\ entry _ right -> entry:right) <$> pBibEntry)
( [] <$ pList (allChars `pExcept` "@"))


pBibEntry
= ( Entry <$ pAt <*> pName <*> pOpenClose ( pKeyName <* pSpec ','
<+> pListSep_ng pComma pField
<* (pComma `opt` ' '))
<|> Comment <$ pAt <* pKey "comment" <*> ( pCurly (pList (allChars `pExcept` "}"))
<|> pParen (pList (allChars `pExcept` ")"))
)
<|> Preamble <$ pAt <* pKey "preamble" <*> pOpenClose pValItems
<|> StringDef <$ pAt <* pKey "string" <*> pOpenClose pField
)


pField     :: BibParser (String, [ValItem])
pField     =  pName <* pSpec '=' <+> pValItems

pValItems = pList1Sep (pSpec '#') ( StringVal <$> pString
<|> int_or_name <$> pName
)
where int_or_name s = if all isDigit s
then IntVal.(read::String->Int) $ s
else NameUse s
-- ======================================================================== ===============
-- ===== LEXICAL STUFF ===================================================================
-- ======================================================================== ===============
pLAYOUT :: BibParser String
pLAYOUT = pList (EOr [] `setfirsts` pAnySym " \t\r\n")
pSpec c = pSym c <* pLAYOUT


pParen      p = pPacked (pSpec '(') (pSpec ')') p
pCurly      p = pPacked (pSpec '{') (pSpec '}') p
pOpenClose  p = pParen p <|> pCurly p
pComma        = pCostSym  4 ',' ',' <* pLAYOUT
pAt           = pSpec '@'

allChars = (chr 1, chr 127, ' ')

pName = pList1 ('a'<..>'z' <|> 'A'<..>'Z' <|> '0'<..>'9' <|> pAnySym "-_/") <* pLAYOUT
pKeyName = pList1 ((chr 33, chr 127, ' ') `pExcept` ",=@" ) <* pLAYOUT


pKey [s]     = lift <$> (pSym s <|> pSym (toUpper s)) <*  pLAYOUT
pKey (s:ss)  = (:)  <$> (pSym s <|> pSym (toUpper s)) <*> pKey ss
pKey []      = error "Scanner: You cannot have empty reserved words!"

pString
= let curlyStrings = stringcons <$> pSym '{' <*> pConc pStringWord <*> pSym '}'
pStringWordDQ = lift <$> pStringCharDQ <|> curlyStrings
pStringWord = lift <$> pStringChar <|> curlyStrings
pStringCharDQ = allChars `pExcept` "\"{}"
pStringChar = pStringCharDQ <|> pSym '\"'
pConc = pFoldr ((++),[])
stringcons c1 ss c2 = [c1] ++ ss ++ [c2]
in ( pSym '"' *> pConc pStringWordDQ <* pSym '"'
<|> pSym '{' *> pConc pStringWord <* pSym '}'
) <* pLAYOUT


lift c              = [c]



On 2005 apr 07, at 4:18, Thomas Bevan wrote:

Does anyone know of any work being done on Latex with Haskell?

I am particularly interested in finding a Haskell replacement to Bibtex.

Thanks.

Tom
_______________________________________________
Haskell mailing list
[email protected]
http://www.haskell.org/mailman/listinfo/haskell

_______________________________________________ Haskell mailing list [email protected] http://www.haskell.org/mailman/listinfo/haskell

Reply via email to