module Main where import Text.Parsec import Text.Parsec.String import qualified Text.Parsec.Prim -- for parse, parseTest import Text.Parsec.Char import Text.Parsec.Combinator -- -- This is a simple function designed to be invoked from the GHCi environment, so that we can easily test a parser. testParser parser string = Text.Parsec.Prim.parse parser "TestString" string -- {- Basic stuff -} -- Just parse a single 'a' character; that's it. parseA :: Parser () parseA = do char 'a' return () -- Parse a 'b' character instead. parseB :: Parser () parseB = do char 'b' return () -- Or, we can parameterise this function, and pass it an arbitrary character to parse! (This means that this function effectively the same as the 'char' parser function that it calls, only that we return () rather than an interesting value.) parseChar :: Char -> Parser () parseChar c = do char c return () -- {- Return values -} -- Here's our first example of a parser combinator: we have a function that takes in another parser as a parameter. This function will 'wrap' the given function by parsing an open and close parenthesis 'around' the given parser. inParentheses1 :: Parser a -> Parser () inParentheses1 parser = do char '(' parser char ')' return () -- This is similar to inParenthesis1, except that we capture the output from the given function and return that, rather than returning (). inParentheses2 :: Parser a -> Parser a inParentheses2 parser = do char '(' outputFromParser <- parser char ')' return outputFromParser -- Of course, we can simply let Haskell deduce the type signature for us... {- note: no type signature here -} inParentheses3 parser = do char '(' outputFromParser <- parser char ')' return (length outputFromParser) -- And this is an example of building of a new parser from other parsers: here, inParentheses4 calls inParentheses2 with its argument, grabs its output, and returns its length. It's the same as inParentheses3, but perhaps shows the idea of "combinators" more succinctly. inParentheses4 parser = do parserOutput <- inParentheses2 parser return (length parserOutput) -- -- At this point in the talk, we pull up the documentation for the Text.Parsec.Char module, to show the "primitive" character parsers. We demo the oneOf and satisfy functions from that module. {- show Text.Parsec.Char, demo oneOf, satisfy -} -- After demoing oneOf and satify, we then show the documentation for Text.Parsec.Combinator, and demo the many1 parser combinator function from that module. {- show Text.Parsec.Combinator, demo many1 -} -- -- Now we demonstrate something more substantial. We introduce a tiny language that looks like the information on a shopping receipt, and write a parser for it. -- Here's some examples of the receipt transaction strings: note that the "total" item must be the exact sum of all the previous items in the receipt. goodTransactionString1 = "book 12.00; plant 2.55; 14.55 total" goodTransactionString2 = "book 12.00; plant 2.55; refund 1.00; 13.55 total" -- And here's two examples of bad receipts. Contrast the incorrect total figure with the correct total figure above. badTransactionString1 = "book 12.00; plant 2.55; 14.56 total" badTransactionString2 = "book 12.00; plant 2.55; refund 1.00; 13.56 total" -- Here's the EBNF grammar for the receipt language: {- receipt ::= product* total product ::= "refund" _ price ";" _ | [a-zA-Z]+ _ price ";" _ total ::= price _ "total" price ::= digit+ "." digit digit -} -- And here are some examples that should correctly fail to parse: illegalTransactionString1 = "book 12.00; plant 2; 14.00 total" illegalTransactionString2 = "book 12.00 ; plant 2; 14.00 total" -- -- First, we define a data type for a particular transaction item: an item can either be the name of a product, or it can be a refund. data Transaction = Product String Float | Refund Float deriving (Show, Eq) -- And, we define a function to calculate the total amount for a list of transactions. transactionTotal [] = 0.00 transactionTotal (Product _ amount:x) = amount + transactionTotal x transactionTotal (Refund amount:x) = (-amount) + transactionTotal x -- Now we start translating from the EBNF grammar to Parsec. We start with the price node. Observe the similarity between the EBNF grammar and the actual code! {- price ::= digit+ "." digit digit -} price' :: Parser () price' = do many1 digit char '.' digit digit return () -- Of course, the main difference vs the grammar is that we need to return some semantic information. So, here's the real version of the price parser that actually returns the list of parsed products. I split it up into two phases so that hopefully the parsing code's a little more clear. (Keep in mind that this is still 100% Haskell code: there's no crazy pre-processing being done to turn this into Haskell code!) price = do a <- many1 digit b <- char '.' -- "need a dot, because this is meant to be money, dumbass" c <- digit d <- digit let s = a ++ [b] ++ [c] ++ [d] let f = read s :: Float return f -- Next, the total node, which is pretty simple. {- total ::= price "total" -} total' = do price string "total" total = do p <- price spaces string "total" return p -- And now, we look at the product node. Again, observe the similarity between the EBNF grammar and the Parsec code. {- product ::= "refund" _ price ";" _ | [a-zA-Z]+ _ price ";" _ -} product' = do string "refund" spaces price char ';' spaces return () <|> do many1 letter spaces price char ';' spaces return () -- Once again, we write a slightly different version of product that returns the semantic information we need from the receipt. product :: Parser Transaction product = do string "refund" spaces p <- price char ';' spaces return (Refund p) <|> do productName <- many1 letter spaces p <- price char ';' spaces return (Product productName p) -- And finally, the receipt node. That's it! {- receipt ::= product* total -} receipt' = do many Main.product total return () receipt = do products <- many Main.product claimedTotalPrice <- total let calculatedTotalPrice = transactionTotal products return products