Attached patch, because the svn repository is down. 1. Remove old qq code (it's all in the q code now) 2. Make <<>> work 3. Regular expressions are parsed as qq//, but with no backslash protection at all. (Should it be partial backslash protection for \qq and \<delim>?) 4. Hash subscripts using <> or <<>> are parsed with the same code as general quotations. This means interpolation, etc. is done just like q:w or qq:ww.
Note: For now :ww acts just like :w, i.e. no protection. This is because I don't know what the semantics should be. (I asked on p6l, but the jury is still in deliberations. Or maybe they declared a mistrial while I wasn't looking). -- -Roie v2sw6+7CPhw5ln5pr4/6$ck2ma8+9u7/8LSw2l6Fi2e2+8t4TNDSb8/4Aen4+7g5Za22p7/8 [ http://www.hackerkey.com ]
Index: src/Pugs/Lexer.hs =================================================================== --- src/Pugs/Lexer.hs (revision 2137) +++ src/Pugs/Lexer.hs (working copy) @@ -96,9 +96,14 @@ aheadSym '~' y = not (y `elem` "&|^<>~") aheadSym x y = y `elem` ";!" || x /= y -interpolatingStringLiteral endchar interpolator = do - list <- stringList - return $ Cxt "Str" (homogenConcat list) +interpolatingStringLiteral :: RuleParser x -- Closing delimiter + -> RuleParser Exp -- Interpolator + -> RuleParser Exp -- Entire string + -- (without delims) + +interpolatingStringLiteral endrule interpolator = do + list <- stringList + return $ Syn "cxt" [Val (VStr "Str"), homogenConcat list] where homogenConcat :: [Exp] -> Exp homogenConcat [] = Val (VStr "") @@ -107,10 +112,10 @@ homogenConcat (x:y:xs) = App "&infix:~" [x, homogenConcat (y:xs)] [] stringList = do - lookAhead (char endchar) + lookAhead endrule return [] <|> do - parse <- interpolator endchar + parse <- interpolator rest <- stringList return (parse:rest) <|> do Index: src/Pugs/Parser.hs =================================================================== --- src/Pugs/Parser.hs (revision 2137) +++ src/Pugs/Parser.hs (working copy) @@ -944,53 +944,36 @@ , angleBracketLiteral ] -rxInterpolator end = choice - [ qqInterpolatorVar end, rxInterpolatorChar, ruleVerbatimBlock ] - -qqInterpolator end = choice - [ qqInterpolatorVar end, qqInterpolatorChar, ruleVerbatimBlock ] - -qqInterpolatorVar end = try $ do - var <- ruleVarNameString - if (last var == end) then fail "" else return () - fs <- if head var == '$' - then many qqInterpolatorPostTerm - else many1 qqInterpolatorPostTerm - return $ foldr (.) id (reverse fs) $ makeVar var - -qqInterpolatorPostTerm = try $ do - option ' ' $ char '.' - choice - [ ruleInvocationParens - , ruleArraySubscript - , ruleHashSubscript - , ruleCodeSubscript - ] - -rxInterpolatorChar = do +-- Interpolating constructs +qInterpolatorChar = do char '\\' - nextchar <- anyChar -- escapeCode -- see Lexer.hs - return (Val $ VStr ['\\', nextchar]) - -qqInterpolatorChar = do - char '\\' nextchar <- escapeCode -- see Lexer.hs return (Val $ VStr [nextchar]) -qInterpolateDelimiter end = do +qInterpolateDelimiter protectedChar = do char '\\' - c <- oneOf (end:"\\") + c <- oneOf (protectedChar:"\\") return (Val $ VStr [c]) qInterpolateQuoteConstruct = try $ do string "\\q" flag <- many1 alphaNum char '[' - expr <- interpolatingStringLiteral (']') (qInterpolator $ getQFlags [flag]) + expr <- interpolatingStringLiteral (char ']') (qInterpolator $ getQFlags [flag] ']') char ']' return expr -qInterpolator flags end = choice [ +qInterpolatorPostTerm = try $ do + option ' ' $ char '.' + choice + [ ruleInvocationParens + , ruleArraySubscript + , ruleHashSubscript + , ruleCodeSubscript + ] + +qInterpolator :: QFlags -> RuleParser Exp +qInterpolator flags = choice [ closure, backslash, variable @@ -1000,39 +983,45 @@ then ruleVerbatimBlock else mzero backslash = case qfInterpolateBackslash flags of - 'a' -> try qqInterpolatorChar + 'a' -> try qInterpolatorChar <|> (try qInterpolateQuoteConstruct) - <|> (try $ qInterpolateDelimiter end) + <|> (try $ qInterpolateDelimiter $ qfProtectedChar flags) 's' -> try qInterpolateQuoteConstruct - <|> (try $ qInterpolateDelimiter end) + <|> (try $ qInterpolateDelimiter $ qfProtectedChar flags) 'n' -> mzero _ -> fail "" variable = try $ do var <- ruleVarNameString - if (last var == end) then fail "" else return () fs <- case head var of '$' -> if qfInterpolateScalar flags - then many qqInterpolatorPostTerm + then many qInterpolatorPostTerm else fail "" '@' -> if qfInterpolateArray flags - then many1 qqInterpolatorPostTerm + then many1 qInterpolatorPostTerm else fail "" '%' -> if qfInterpolateHash flags - then many1 qqInterpolatorPostTerm + then many1 qInterpolatorPostTerm else fail "" '&' -> if qfInterpolateFunction flags - then many1 qqInterpolatorPostTerm + then many1 qInterpolatorPostTerm else fail "" _ -> fail "" return $ foldr (.) id (reverse fs) $ makeVar var qLiteral = do -- This should include q:anything// as well as '' "" <> - (ch, flags) <- getQDelim - expr <- interpolatingStringLiteral (balancedDelim ch) (qInterpolator flags) - char (balancedDelim ch) + (qEnd, flags) <- getQDelim + qLiteral1 qEnd flags + +qLiteral1 :: RuleParser x -- Closing delimiter + -> QFlags + -> RuleParser Exp +qLiteral1 qEnd flags = do + expr <- interpolatingStringLiteral qEnd (qInterpolator flags) + qEnd case qfSplitWords flags of -- expr ~~ rx:perl5:g/(\S+)/ 'y' -> return $ doSplit expr + 'p' -> return $ doSplit expr 'n' -> return expr _ -> fail "" where @@ -1045,16 +1034,36 @@ ] ] +angleBracketLiteral :: RuleParser Exp +angleBracketLiteral = try $ + do + symbol "<<" + qLiteral1 (symbol ">>") (qqFlags { qfSplitWords = 'p', qfProtectedChar = '>' }) + <|> do + symbol "<" + qLiteral1 (char '>') (qFlags { qfSplitWords = 'y', qfProtectedChar = '>' }) + <|> do + symbol "\xab" + qLiteral1 (char '\xbb') (qFlags { qfSplitWords = 'y', qfProtectedChar = '\xbb' }) + +-- Quoting delimitor and flags data QFlags = QFlags { qfSplitWords :: !Char, -- No, Yes, Protect qfInterpolateScalar :: !Bool, qfInterpolateArray :: !Bool, qfInterpolateHash :: !Bool, qfInterpolateFunction :: !Bool, qfInterpolateClosure :: !Bool, - qfInterpolateBackslash :: !Char -- No, Single, All + qfInterpolateBackslash :: !Char, -- No, Single, All + qfProtectedChar :: !Char + {- qfProtectedChar is the character to be + protected by backslashes, if + qfInterpolateBackslash is Single or All. + -} } -getQFlags flagnames = foldr useflag qFlags $ reverse flagnames +getQFlags :: [String] -> Char -> QFlags +getQFlags flagnames protectedChar = + (foldr useflag qFlags $ reverse flagnames) { qfProtectedChar = protectedChar } where -- Additive flags useflag "w" qf = qf { qfSplitWords = 'y' } @@ -1086,6 +1095,9 @@ -- XXX What to do in case of unknown flag? Currently do nothing useflag _ qf = qf +openingDelim = anyChar +{- XXX can be later defined to exclude alphanumerics, maybe also exclude +closing delims from being openers (disallow q]a]) -} getQDelim = try $ do string "q" @@ -1098,32 +1110,37 @@ notFollowedBy alphaNum whiteSpace - delim <- anyChar - return (delim, getQFlags flags) + delim <- openingDelim + return (char $ balancedDelim delim, getQFlags flags $ balancedDelim delim) + <|> try (do + string "<<" + return ( + string ">>" >> return 'x', + qqFlags { qfSplitWords = 'p', qfProtectedChar = '>' })) <|> do - char <- oneOf "\"'<«" - case char of - '"' -> return ('"', qqFlags) - '\'' -> return ('\'', qFlags) - '<' -> return ('<', qFlags { qfSplitWords = 'y' }) - --'«' -> return ('«', qqFlags { qfSplitWords = 'p' }) - _ -> fail "" + delim <- oneOf "\"'<\xab" + case delim of + '"' -> return (char '"', qqFlags) + '\'' -> return (char '\'', qFlags) + '<' -> return (char '>', qFlags { qfSplitWords = 'y', qfProtectedChar = '>' }) + '\xab' -> return (char '\xbb', qqFlags { qfSplitWords = 'p', qfProtectedChar = '\xbb' }) + _ -> fail "" where oneflag = do string ":" many alphaNum -- Default flags -qFlags = QFlags 'n' False False False False False 's' -qqFlags = QFlags 'n' True True True True True 'a' -rawFlags = QFlags 'n' False False False False False 'n' +qFlags = QFlags 'n' False False False False False 's' '\'' +qqFlags = QFlags 'n' True True True True True 'a' '"' +rawFlags = QFlags 'n' False False False False False 'n' 'x' +-- Regexps +rxLiteral1 :: RuleParser x -- Closing delimiter + -> RuleParser Exp +rxLiteral1 rxEnd = qLiteral1 rxEnd $ + qqFlags { qfInterpolateBackslash = 'n'} -quotedDelim ch = choice - [ try $ do { string [ '\\', ch ]; return ch } - , try $ do { string "\\\\"; return '\\' } - ] - ruleAdverbHash = do pairs <- many pairAdverb return $ Syn "\\{}" [Syn "," pairs] @@ -1131,33 +1148,21 @@ substLiteral = try $ do symbol "s" adverbs <- ruleAdverbHash - ch <- anyChar + ch <- openingDelim let endch = balancedDelim ch - expr <- interpolatingStringLiteral endch rxInterpolator - char endch + expr <- rxLiteral1 (char endch) ch <- if ch == endch then return ch else do { whiteSpace ; anyChar } let endch = balancedDelim ch - subst <- interpolatingStringLiteral endch qqInterpolator - char endch + subst <- qLiteral1 (char endch) qqFlags { qfProtectedChar = endch } return $ Syn "subst" [expr, subst, adverbs] rxLiteral = try $ do symbol "rx" adverbs <- ruleAdverbHash ch <- anyChar - expr <- interpolatingStringLiteral (balancedDelim ch) rxInterpolator - char $ balancedDelim ch + expr <- rxLiteral1 (char $ balancedDelim ch) return $ Syn "rx" [expr, adverbs] -angleBracketLiteral = try $ do - symbol "<" - str <- many $ satisfy (/= '>') - char '>' - return $ case words str of - [] -> Val (VStr "") - [x] -> Val (VStr x) - xs -> Syn "," $ map (Val . VStr) xs - namedLiteral n v = do { symbol n; return $ Val v } dotdotdotLiteral = do