regex-applicative-0.3.3.1/Text/0000755000000000000000000000000011604076204014377 5ustar0000000000000000regex-applicative-0.3.3.1/Text/Regex/0000755000000000000000000000000012637721012015452 5ustar0000000000000000regex-applicative-0.3.3.1/Text/Regex/Applicative/0000755000000000000000000000000013256162617017723 5ustar0000000000000000regex-applicative-0.3.3.1/benchmark/0000755000000000000000000000000013220225302015374 5ustar0000000000000000regex-applicative-0.3.3.1/tests/0000755000000000000000000000000013220217534014614 5ustar0000000000000000regex-applicative-0.3.3.1/Text/Regex/Applicative.hs0000644000000000000000000000165012637721012020251 0ustar0000000000000000-------------------------------------------------------------------- -- | -- Module : Text.Regex.Applicative -- Copyright : (c) Roman Cheplyaka -- License : MIT -- -- Maintainer: Roman Cheplyaka -- Stability : experimental -- -- To get started, see some examples on the wiki: -- -------------------------------------------------------------------- module Text.Regex.Applicative ( RE , sym , psym , msym , anySym , string , reFoldl , Greediness(..) , few , comap , withMatched , match , (=~) , replace , findFirstPrefix , findLongestPrefix , findShortestPrefix , findFirstInfix , findLongestInfix , findShortestInfix , module Control.Applicative ) where import Text.Regex.Applicative.Types import Text.Regex.Applicative.Interface import Control.Applicative regex-applicative-0.3.3.1/Text/Regex/Applicative/Object.hs0000644000000000000000000001050612532363622021462 0ustar0000000000000000-------------------------------------------------------------------- -- | -- Module : Text.Regex.Applicative.Object -- Copyright : (c) Roman Cheplyaka -- License : MIT -- -- Maintainer: Roman Cheplyaka -- Stability : experimental -- -- This is a low-level interface to the regex engine. -------------------------------------------------------------------- {-# LANGUAGE GADTs #-} module Text.Regex.Applicative.Object ( ReObject , compile , emptyObject , Thread , threads , failed , isResult , getResult , results , ThreadId , threadId , step , stepThread , fromThreads , addThread ) where import Text.Regex.Applicative.Types import qualified Text.Regex.Applicative.StateQueue as SQ import qualified Text.Regex.Applicative.Compile as Compile import Data.Maybe import Data.Foldable as F import Control.Monad.Trans.State import Control.Applicative hiding (empty) -- | The state of the engine is represented as a \"regex object\" of type -- @'ReObject' s r@, where @s@ is the type of symbols and @r@ is the -- result type (as in the 'RE' type). Think of 'ReObject' as a collection of -- 'Thread's ordered by priority. E.g. threads generated by the left part of -- '<|>' come before the threads generated by the right part. newtype ReObject s r = ReObject (SQ.StateQueue (Thread s r)) -- | List of all threads of an object. Each non-result thread has a unique id. threads :: ReObject s r -> [Thread s r] threads (ReObject sq) = F.toList sq -- | Create an object from a list of threads. It is recommended that all -- threads come from the same 'ReObject', unless you know what you're doing. -- However, it should be safe to filter out or rearrange threads. fromThreads :: [Thread s r] -> ReObject s r fromThreads ts = F.foldl' (flip addThread) emptyObject ts -- | Check whether a thread is a result thread isResult :: Thread s r -> Bool isResult Accept {} = True isResult _ = False -- | Return the result of a result thread, or 'Nothing' if it's not a result -- thread getResult :: Thread s r -> Maybe r getResult (Accept r) = Just r getResult _ = Nothing -- | Check if the object has no threads. In that case it never will -- produce any new threads as a result of 'step'. failed :: ReObject s r -> Bool failed obj = null $ threads obj -- | Empty object (with no threads) emptyObject :: ReObject s r emptyObject = ReObject $ SQ.empty -- | Extract the result values from all the result threads of an object results :: ReObject s r -> [r] results obj = mapMaybe getResult $ threads obj -- | Feed a symbol into a regex object step :: s -> ReObject s r -> ReObject s r step s (ReObject sq) = let accum q t = case t of Accept {} -> q Thread _ c -> F.foldl' (\q x -> addThread x q) q $ c s newQueue = F.foldl' accum emptyObject sq in newQueue -- | Feed a symbol into a non-result thread. It is an error to call 'stepThread' -- on a result thread. stepThread :: s -> Thread s r -> [Thread s r] stepThread s t = case t of Thread _ c -> c s Accept {} -> error "stepThread on a result" -- | Add a thread to an object. The new thread will have lower priority than the -- threads which are already in the object. -- -- If a (non-result) thread with the same id already exists in the object, the -- object is not changed. addThread :: Thread s r -> ReObject s r -> ReObject s r addThread t (ReObject q) = case t of Accept {} -> ReObject $ SQ.insert t q Thread { threadId_ = ThreadId i } -> ReObject $ SQ.insertUnique i t q -- | Compile a regular expression into a regular expression object compile :: RE s r -> ReObject s r compile = fromThreads . flip Compile.compile (\x -> [Accept x]) . renumber renumber :: RE s a -> RE s a renumber e = flip evalState (ThreadId 1) $ go e where go :: RE s a -> State ThreadId (RE s a) go e = case e of Eps -> return Eps Symbol _ p -> Symbol <$> fresh <*> pure p Alt a1 a2 -> Alt <$> go a1 <*> go a2 App a1 a2 -> App <$> go a1 <*> go a2 Fail -> return Fail Fmap f a -> Fmap f <$> go a Rep g f b a -> Rep g f b <$> go a Void a -> Void <$> go a fresh :: State ThreadId ThreadId fresh = do t@(ThreadId i) <- get put $! ThreadId (i+1) return t regex-applicative-0.3.3.1/Text/Regex/Applicative/Common.hs0000644000000000000000000000175712545746131021520 0ustar0000000000000000-- | -- Collection of commonly used regular expressions. module Text.Regex.Applicative.Common ( -- * Digits digit , hexDigit -- * Numbers , signed , decimal , hexadecimal ) where import Data.Char import Data.List (foldl') import Text.Regex.Applicative -- | Decimal digit, i.e. @\'0\'@..@\'9\'@ digit :: Num a => RE Char a digit = fromIntegral . digitToInt <$> psym isDigit -- | Hexadecimal digit -- i.e. @\'0\'@..@\'9\'@, @\'a\'@..@\'f\'@, @\'A\'@..@\'F\'@. hexDigit :: Num a => RE Char a hexDigit = fromIntegral . digitToInt <$> psym isHexDigit -- | Add optional sign signed :: Num a => RE Char a -> RE Char a signed p = sign <*> p where sign = id <$ sym '+' <|> negate <$ sym '-' <|> pure id -- | Parse decimal number without sign. decimal :: Num a => RE Char a decimal = foldl' (\d i -> d*10 + i) 0 <$> some digit -- | Parse decimal number without sign. hexadecimal :: Num a => RE Char a hexadecimal = foldl' (\d i -> d*16 + i) 0 <$> some hexDigit regex-applicative-0.3.3.1/Text/Regex/Applicative/Reference.hs0000644000000000000000000000417212545466544022167 0ustar0000000000000000-------------------------------------------------------------------- -- | -- Module : Text.Regex.Applicative.Reference -- Copyright : (c) Roman Cheplyaka -- License : MIT -- -- Maintainer: Roman Cheplyaka -- Stability : experimental -- -- Reference implementation (using backtracking). -- -- This is exposed for testing purposes only! -------------------------------------------------------------------- {-# LANGUAGE GADTs #-} module Text.Regex.Applicative.Reference (reference) where import Prelude hiding (getChar) import Text.Regex.Applicative.Types import Control.Applicative import Control.Monad -- A simple parsing monad newtype P s a = P { unP :: [s] -> [(a, [s])] } instance Monad (P s) where return x = P $ \s -> [(x, s)] (P a) >>= k = P $ \s -> a s >>= \(x,s) -> unP (k x) s instance Functor (P s) where fmap = liftM instance Applicative (P s) where (<*>) = ap pure = return instance Alternative (P s) where empty = P $ const [] P a1 <|> P a2 = P $ \s -> a1 s ++ a2 s getChar :: P s s getChar = P $ \s -> case s of [] -> [] c:cs -> [(c,cs)] re2monad :: RE s a -> P s a re2monad r = case r of Eps -> return $ error "eps" Symbol _ p -> do c <- getChar case p c of Just r -> return r Nothing -> empty Alt a1 a2 -> re2monad a1 <|> re2monad a2 App a1 a2 -> re2monad a1 <*> re2monad a2 Fmap f a -> fmap f $ re2monad a Rep g f b a -> rep b where am = re2monad a rep b = combine (do a <- am; rep $ f b a) (return b) combine a b = case g of Greedy -> a <|> b; NonGreedy -> b <|> a Void a -> re2monad a >> return () Fail -> empty runP :: P s a -> [s] -> Maybe a runP m s = case filter (null . snd) $ unP m s of (r, _) : _ -> Just r _ -> Nothing -- | 'reference' @r@ @s@ should give the same results as @s@ '=~' @r@. -- -- However, this is not very efficient implementation and is supposed to be -- used for testing only. reference :: RE s a -> [s] -> Maybe a reference r s = runP (re2monad r) s regex-applicative-0.3.3.1/Text/Regex/Applicative/StateQueue.hs0000644000000000000000000000313312532353440022334 0ustar0000000000000000-- | This internal module is exposed only for testing and benchmarking. You -- don't need to import it. module Text.Regex.Applicative.StateQueue ( StateQueue , empty , insert , insertUnique , getElements ) where import Prelude hiding (read, lookup, replicate) import qualified Data.IntSet as IntSet import Data.Foldable as F -- | 'StateQueue' is a data structure that can efficiently insert elements -- (preserving their order) -- and check whether an element with the given 'Int' key is already in the queue. data StateQueue a = StateQueue { elements :: [a] , ids :: !IntSet.IntSet } deriving (Eq,Show) instance Foldable StateQueue where foldr f a = F.foldr f a . getElements -- | Get the list of all elements getElements :: StateQueue a -> [a] getElements = reverse . elements {-# INLINE empty #-} -- | The empty state queue empty :: StateQueue a empty = StateQueue { elements = [] , ids = IntSet.empty } {-# INLINE insert #-} -- | Insert an element in the state queue, unless there is already an element with the same key insertUnique :: Int -- ^ key -> a -> StateQueue a -> StateQueue a insertUnique i v sq@StateQueue { ids = ids, elements = elements } = if i `IntSet.member` ids then sq else sq { elements = v : elements , ids = IntSet.insert i ids } -- | Insert an element in the state queue without a key. -- -- Since 'insert' doesn't take a key, it won't affect any 'insertUnique'. insert :: a -> StateQueue a -> StateQueue a insert v sq = sq { elements = v : elements sq } regex-applicative-0.3.3.1/Text/Regex/Applicative/Interface.hs0000644000000000000000000002706112637721012022155 0ustar0000000000000000{-# LANGUAGE TypeFamilies, GADTs, TupleSections #-} {-# OPTIONS_GHC -fno-warn-orphans #-} module Text.Regex.Applicative.Interface where import Control.Applicative hiding (empty) import qualified Control.Applicative import Control.Arrow import Data.Traversable import Data.String import Data.Maybe import Text.Regex.Applicative.Types import Text.Regex.Applicative.Object instance Functor (RE s) where fmap f x = Fmap f x f <$ x = pure f <* x instance Applicative (RE s) where pure x = const x <$> Eps a1 <*> a2 = App a1 a2 a *> b = pure (const id) <*> Void a <*> b a <* b = pure const <*> a <*> Void b instance Alternative (RE s) where a1 <|> a2 = Alt a1 a2 empty = Fail many a = reverse <$> Rep Greedy (flip (:)) [] a some a = (:) <$> a <*> many a instance (char ~ Char, string ~ String) => IsString (RE char string) where fromString = string -- | 'RE' is a profunctor. This is its contravariant map. -- -- (A dependency on the @profunctors@ package doesn't seem justified.) comap :: (s2 -> s1) -> RE s1 a -> RE s2 a comap f re = case re of Eps -> Eps Symbol t p -> Symbol t (p . f) Alt r1 r2 -> Alt (comap f r1) (comap f r2) App r1 r2 -> App (comap f r1) (comap f r2) Fmap g r -> Fmap g (comap f r) Fail -> Fail Rep gr fn a r -> Rep gr fn a (comap f r) Void r -> Void (comap f r) -- | Match and return a single symbol which satisfies the predicate psym :: (s -> Bool) -> RE s s psym p = msym (\s -> if p s then Just s else Nothing) -- | Like 'psym', but allows to return a computed value instead of the -- original symbol msym :: (s -> Maybe a) -> RE s a msym p = Symbol (error "Not numbered symbol") p -- | Match and return the given symbol sym :: Eq s => s -> RE s s sym s = psym (s ==) -- | Match and return any single symbol anySym :: RE s s anySym = msym Just -- | Match and return the given sequence of symbols. -- -- Note that there is an 'IsString' instance for regular expression, so -- if you enable the @OverloadedStrings@ language extension, you can write -- @string \"foo\"@ simply as @\"foo\"@. -- -- Example: -- -- >{-# LANGUAGE OverloadedStrings #-} -- >import Text.Regex.Applicative -- > -- >number = "one" *> pure 1 <|> "two" *> pure 2 -- > -- >main = print $ "two" =~ number string :: Eq a => [a] -> RE a [a] string = traverse sym -- | Match zero or more instances of the given expression, which are combined using -- the given folding function. -- -- 'Greediness' argument controls whether this regular expression should match -- as many as possible ('Greedy') or as few as possible ('NonGreedy') instances -- of the underlying expression. reFoldl :: Greediness -> (b -> a -> b) -> b -> RE s a -> RE s b reFoldl g f b a = Rep g f b a -- | Match zero or more instances of the given expression, but as -- few of them as possible (i.e. /non-greedily/). A greedy equivalent of 'few' -- is 'many'. -- -- Examples: -- -- >Text.Regex.Applicative> findFirstPrefix (few anySym <* "b") "ababab" -- >Just ("a","abab") -- >Text.Regex.Applicative> findFirstPrefix (many anySym <* "b") "ababab" -- >Just ("ababa","") few :: RE s a -> RE s [a] few a = reverse <$> Rep NonGreedy (flip (:)) [] a -- | Return matched symbols as part of the return value withMatched :: RE s a -> RE s (a, [s]) withMatched Eps = flip (,) [] <$> Eps withMatched (Symbol t p) = Symbol t (\s -> (,[s]) <$> p s) withMatched (Alt a b) = withMatched a <|> withMatched b withMatched (App a b) = (\(f, s) (x, t) -> (f x, s ++ t)) <$> withMatched a <*> withMatched b withMatched Fail = Fail withMatched (Fmap f x) = (f *** id) <$> withMatched x withMatched (Rep gr f a0 x) = Rep gr (\(a, s) (x, t) -> (f a x, s ++ t)) (a0, []) (withMatched x) -- N.B.: this ruins the Void optimization withMatched (Void x) = (const () *** id) <$> withMatched x -- | @s =~ a = match a s@ (=~) :: [s] -> RE s a -> Maybe a (=~) = flip match infix 2 =~ -- | Attempt to match a string of symbols against the regular expression. -- Note that the whole string (not just some part of it) should be matched. -- -- Examples: -- -- >Text.Regex.Applicative> match (sym 'a' <|> sym 'b') "a" -- >Just 'a' -- >Text.Regex.Applicative> match (sym 'a' <|> sym 'b') "ab" -- >Nothing -- match :: RE s a -> [s] -> Maybe a match re = let obj = compile re in \str -> listToMaybe $ results $ foldl (flip step) obj str -- | Find a string prefix which is matched by the regular expression. -- -- Of all matching prefixes, pick one using left bias (prefer the left part of -- '<|>' to the right part) and greediness. -- -- This is the match which a backtracking engine (such as Perl's one) would find -- first. -- -- If match is found, the rest of the input is also returned. -- -- Examples: -- -- >Text.Regex.Applicative> findFirstPrefix ("a" <|> "ab") "abc" -- >Just ("a","bc") -- >Text.Regex.Applicative> findFirstPrefix ("ab" <|> "a") "abc" -- >Just ("ab","c") -- >Text.Regex.Applicative> findFirstPrefix "bc" "abc" -- >Nothing findFirstPrefix :: RE s a -> [s] -> Maybe (a, [s]) findFirstPrefix re str = go (compile re) str Nothing where walk obj [] = (obj, Nothing) walk obj (t:ts) = case getResult t of Just r -> (obj, Just r) Nothing -> walk (addThread t obj) ts go obj str resOld = case walk emptyObject $ threads obj of (obj', resThis) -> let res = ((flip (,) str) <$> resThis) <|> resOld in case str of _ | failed obj' -> res [] -> res (s:ss) -> go (step s obj') ss res -- | Find the longest string prefix which is matched by the regular expression. -- -- Submatches are still determined using left bias and greediness, so this is -- different from POSIX semantics. -- -- If match is found, the rest of the input is also returned. -- -- Examples: -- -- >Text.Regex.Applicative Data.Char> let keyword = "if" -- >Text.Regex.Applicative Data.Char> let identifier = many $ psym isAlpha -- >Text.Regex.Applicative Data.Char> let lexeme = (Left <$> keyword) <|> (Right <$> identifier) -- >Text.Regex.Applicative Data.Char> findLongestPrefix lexeme "if foo" -- >Just (Left "if"," foo") -- >Text.Regex.Applicative Data.Char> findLongestPrefix lexeme "iffoo" -- >Just (Right "iffoo","") findLongestPrefix :: RE s a -> [s] -> Maybe (a, [s]) findLongestPrefix re str = go (compile re) str Nothing where go obj str resOld = let res = (fmap (flip (,) str) $ listToMaybe $ results obj) <|> resOld in case str of _ | failed obj -> res [] -> res (s:ss) -> go (step s obj) ss res -- | Find the shortest prefix (analogous to 'findLongestPrefix') findShortestPrefix :: RE s a -> [s] -> Maybe (a, [s]) findShortestPrefix re str = go (compile re) str where go obj str = case results obj of r : _ -> Just (r, str) _ | failed obj -> Nothing _ -> case str of [] -> Nothing s:ss -> go (step s obj) ss -- | Find the leftmost substring that is matched by the regular expression. -- Otherwise behaves like 'findFirstPrefix'. Returns the result together with -- the prefix and suffix of the string surrounding the match. findFirstInfix :: RE s a -> [s] -> Maybe ([s], a, [s]) findFirstInfix re str = fmap (\((first, res), last) -> (first, res, last)) $ findFirstPrefix ((,) <$> few anySym <*> re) str -- Auxiliary function for findExtremeInfix prefixCounter :: RE s (Int, [s]) prefixCounter = second reverse <$> reFoldl NonGreedy f (0, []) anySym where f (i, prefix) s = ((,) $! (i+1)) $ s:prefix data InfixMatchingState s a = GotResult { prefixLen :: !Int , prefixStr :: [s] , result :: a , postfixStr :: [s] } | NoResult -- a `preferOver` b chooses one of a and b, giving preference to a preferOver :: InfixMatchingState s a -> InfixMatchingState s a -> InfixMatchingState s a preferOver NoResult b = b preferOver b NoResult = b preferOver a b = case prefixLen a `compare` prefixLen b of GT -> b -- prefer b when it has smaller prefix _ -> a -- otherwise, prefer a mkInfixMatchingState :: [s] -- rest of input -> Thread s ((Int, [s]), a) -> InfixMatchingState s a mkInfixMatchingState rest thread = case getResult thread of Just ((pLen, pStr), res) -> GotResult { prefixLen = pLen , prefixStr = pStr , result = res , postfixStr = rest } Nothing -> NoResult gotResult :: InfixMatchingState s a -> Bool gotResult GotResult {} = True gotResult _ = False -- Algorithm for finding leftmost longest infix match: -- -- 1. Add a thread /.*?/ to the begginning of the regexp -- 2. As soon as we get first accept, we delete that thread -- 3. When we get more than one accept, we choose one by the following criteria: -- 3.1. Compare by the length of prefix (since we are looking for the leftmost -- match) -- 3.2. If they are produced on the same step, choose the first one (left-biased -- choice) -- 3.3. If they are produced on the different steps, choose the later one (since -- they have the same prefixes, later means longer) findExtremalInfix :: -- function to combine a later result (first arg) to an earlier one (second -- arg) (InfixMatchingState s a -> InfixMatchingState s a -> InfixMatchingState s a) -> RE s a -> [s] -> Maybe ([s], a, [s]) findExtremalInfix newOrOld re str = case go (compile $ (,) <$> prefixCounter <*> re) str NoResult of NoResult -> Nothing r@GotResult{} -> Just (prefixStr r, result r, postfixStr r) where {- go :: ReObject s ((Int, [s]), a) -> [s] -> InfixMatchingState s a -> InfixMatchingState s a -} go obj str resOld = let resThis = foldl (\acc t -> acc `preferOver` mkInfixMatchingState str t) NoResult $ threads obj res = resThis `newOrOld` resOld obj' = -- If we just found the first result, kill the "prefixCounter" thread. -- We rely on the fact that it is the last thread of the object. if gotResult resThis && not (gotResult resOld) then fromThreads $ init $ threads obj else obj in case str of [] -> res _ | failed obj -> res (s:ss) -> go (step s obj') ss res -- | Find the leftmost substring that is matched by the regular expression. -- Otherwise behaves like 'findLongestPrefix'. Returns the result together with -- the prefix and suffix of the string surrounding the match. findLongestInfix :: RE s a -> [s] -> Maybe ([s], a, [s]) findLongestInfix = findExtremalInfix preferOver -- | Find the leftmost substring that is matched by the regular expression. -- Otherwise behaves like 'findShortestPrefix'. Returns the result together with -- the prefix and suffix of the string surrounding the match. findShortestInfix :: RE s a -> [s] -> Maybe ([s], a, [s]) findShortestInfix = findExtremalInfix $ flip preferOver -- | Replace matches of the regular expression with its value. -- -- >Text.Regex.Applicative > replace ("!" <$ sym 'f' <* some (sym 'o')) "quuxfoofooooofoobarfobar" -- >"quux!!!bar!bar" replace :: RE s [s] -> [s] -> [s] replace r = ($ []) . go where go ys = case findLongestInfix r ys of Nothing -> (ys ++) Just (before, m, rest) -> (before ++) . (m ++) . go rest regex-applicative-0.3.3.1/Text/Regex/Applicative/Types.hs0000644000000000000000000000523412545465714021373 0ustar0000000000000000{-# LANGUAGE GADTs #-} {-# OPTIONS_GHC -fno-do-lambda-eta-expansion -fno-warn-unused-imports #-} module Text.Regex.Applicative.Types where import Control.Applicative -- The above import is needed for haddock to properly generate links to -- Applicative methods. But it's not actually used in the code, hence -- -fno-warn-unused-imports. newtype ThreadId = ThreadId Int -- | A thread either is a result or corresponds to a symbol in the regular -- expression, which is expected by that thread. data Thread s r = Thread { threadId_ :: ThreadId , _threadCont :: s -> [Thread s r] } | Accept r -- | Returns thread identifier. This will be 'Just' for ordinary threads and -- 'Nothing' for results. threadId :: Thread s r -> Maybe ThreadId threadId Thread { threadId_ = i } = Just i threadId _ = Nothing data Greediness = Greedy | NonGreedy deriving (Show, Read, Eq, Ord, Enum) -- | Type of regular expressions that recognize symbols of type @s@ and -- produce a result of type @a@. -- -- Regular expressions can be built using 'Functor', 'Applicative' and -- 'Alternative' instances in the following natural way: -- -- * @f@ '<$>' @ra@ matches iff @ra@ matches, and its return value is the result -- of applying @f@ to the return value of @ra@. -- -- * 'pure' @x@ matches the empty string (i.e. it does not consume any symbols), -- and its return value is @x@ -- -- * @rf@ '<*>' @ra@ matches a string iff it is a concatenation of two -- strings: one matched by @rf@ and the other matched by @ra@. The return value -- is @f a@, where @f@ and @a@ are the return values of @rf@ and @ra@ -- respectively. -- -- * @ra@ '<|>' @rb@ matches a string which is accepted by either @ra@ or @rb@. -- It is left-biased, so if both can match, the result of @ra@ is used. -- -- * 'empty' is a regular expression which does not match any string. -- -- * 'many' @ra@ matches concatenation of zero or more strings matched by @ra@ -- and returns the list of @ra@'s return values on those strings. -- -- * 'some' @ra@ matches concatenation of one or more strings matched by @ra@ -- and returns the list of @ra@'s return values on those strings. data RE s a where Eps :: RE s () Symbol :: ThreadId -> (s -> Maybe a) -> RE s a Alt :: RE s a -> RE s a -> RE s a App :: RE s (a -> b) -> RE s a -> RE s b Fmap :: (a -> b) -> RE s a -> RE s b Fail :: RE s a Rep :: Greediness -- repetition may be greedy or not -> (b -> a -> b) -- folding function (like in foldl) -> b -- the value for zero matches, and also the initial value -- for the folding function -> RE s a -> RE s b Void :: RE s a -> RE s () regex-applicative-0.3.3.1/Text/Regex/Applicative/Compile.hs0000644000000000000000000001120012545466002021634 0ustar0000000000000000{-# LANGUAGE GADTs #-} {-# OPTIONS_GHC -fno-do-lambda-eta-expansion #-} module Text.Regex.Applicative.Compile (compile) where import Control.Monad.Trans.State import Text.Regex.Applicative.Types import Control.Applicative import Data.Maybe import qualified Data.IntMap as IntMap compile :: RE s a -> (a -> [Thread s r]) -> [Thread s r] compile e k = compile2 e (SingleCont k) data Cont a = SingleCont !a | EmptyNonEmpty !a !a instance Functor Cont where fmap f k = case k of SingleCont a -> SingleCont (f a) EmptyNonEmpty a b -> EmptyNonEmpty (f a) (f b) emptyCont :: Cont a -> a emptyCont k = case k of SingleCont a -> a EmptyNonEmpty a _ -> a nonEmptyCont :: Cont a -> a nonEmptyCont k = case k of SingleCont a -> a EmptyNonEmpty _ a -> a -- The whole point of this module is this function, compile2, which needs to be -- compiled with -fno-do-lambda-eta-expansion for efficiency. -- -- Since this option would make other code perform worse, we place this -- function in a separate module and make sure it's not inlined. -- -- The point of "-fno-do-lambda-eta-expansion" is to make sure the tree is -- "compiled" only once. -- -- compile2 function takes two continuations: one when the match is empty and -- one when the match is non-empty. See the "Rep" case for the reason. compile2 :: RE s a -> Cont (a -> [Thread s r]) -> [Thread s r] compile2 e = case e of Eps -> \k -> emptyCont k () Symbol i p -> \k -> [t $ nonEmptyCont k] where -- t :: (a -> [Thread s r]) -> Thread s r t k = Thread i $ \s -> case p s of Just r -> k r Nothing -> [] App n1 n2 -> let a1 = compile2 n1 a2 = compile2 n2 in \k -> case k of SingleCont k -> a1 $ SingleCont $ \a1_value -> a2 $ SingleCont $ k . a1_value EmptyNonEmpty ke kn -> a1 $ EmptyNonEmpty -- empty (\a1_value -> a2 $ EmptyNonEmpty (ke . a1_value) (kn . a1_value)) -- non-empty (\a1_value -> a2 $ EmptyNonEmpty (kn . a1_value) (kn . a1_value)) Alt n1 n2 -> let a1 = compile2 n1 a2 = compile2 n2 in \k -> a1 k ++ a2 k Fail -> const [] Fmap f n -> let a = compile2 n in \k -> a $ fmap (. f) k -- This is actually the point where we use the difference between -- continuations. For the inner RE the empty continuation is a -- "failing" one in order to avoid non-termination. Rep g f b n -> let a = compile2 n threads b k = combine g (a $ EmptyNonEmpty (\_ -> []) (\v -> let b' = f b v in threads b' (SingleCont $ nonEmptyCont k))) (emptyCont k b) in threads b Void n -> let a = compile2_ n in \k -> a $ fmap ($ ()) k data FSMState = SAccept | STransition ThreadId type FSMMap s = IntMap.IntMap (s -> Bool, [FSMState]) mkNFA :: RE s a -> ([FSMState], (FSMMap s)) mkNFA e = flip runState IntMap.empty $ go e [SAccept] where go :: RE s a -> [FSMState] -> State (FSMMap s) [FSMState] go e k = case e of Eps -> return k Symbol i@(ThreadId n) p -> do modify $ IntMap.insert n $ (isJust . p, k) return [STransition i] App n1 n2 -> go n1 =<< go n2 k Alt n1 n2 -> (++) <$> go n1 k <*> go n2 k Fail -> return [] Fmap _ n -> go n k Rep g _ _ n -> let entries = findEntries n cont = combine g entries k in -- return value of 'go' is ignored -- it should be a subset of -- 'cont' go n cont >> return cont Void n -> go n k findEntries :: RE s a -> [FSMState] findEntries e = -- A simple (although a bit inefficient) way to find all entry points is -- just to use 'go' evalState (go e []) IntMap.empty compile2_ :: RE s a -> Cont [Thread s r] -> [Thread s r] compile2_ e = let (entries, fsmap) = mkNFA e mkThread _ k1 (STransition i@(ThreadId n)) = let (p, cont) = fromMaybe (error "Unknown id") $ IntMap.lookup n fsmap in [Thread i $ \s -> if p s then concatMap (mkThread k1 k1) cont else []] mkThread k0 _ SAccept = k0 in \k -> concatMap (mkThread (emptyCont k) (nonEmptyCont k)) entries combine g continue stop = case g of Greedy -> continue ++ stop NonGreedy -> stop ++ continue regex-applicative-0.3.3.1/tests/test.hs0000644000000000000000000001442513220217534016135 0ustar0000000000000000{-# LANGUAGE OverloadedStrings, FlexibleInstances, MultiParamTypeClasses, FlexibleContexts #-} import Text.Regex.Applicative import Text.Regex.Applicative.Reference import Control.Applicative import Control.Monad import Data.Traversable import Data.Maybe import Text.Printf import Test.SmallCheck import Test.SmallCheck.Series import Test.Tasty import Test.Tasty.SmallCheck import Test.Tasty.HUnit import StateQueue -- Small alphabets as SmallCheck's series newtype A = A { a :: Char } deriving Show instance Monad m => Serial m A where series = cons0 $ A 'a' newtype AB = AB { ab :: Char } deriving Show instance Monad m => Serial m AB where series = cons0 (AB 'a') \/ cons0 (AB 'b') newtype ABC = ABC { abc :: Char } deriving Show instance Monad m => Serial m ABC where series = cons0 (ABC 'a') \/ cons0 (ABC 'b') \/ cons0 (ABC 'c') re1 = let one = pure 1 <* sym 'a' two = pure 2 <* sym 'a' <* sym 'a' in (,) <$> (one <|> two) <*> (two <|> one) re2 = sequenceA $ [ pure 1 <* sym 'a' <* sym 'a' <|> pure 2 <* sym 'a' , pure 3 <* sym 'b' , pure 4 <* sym 'b' <|> pure 5 <* sym 'a' ] re3 = sequenceA $ [ pure 0 <|> pure 1 , pure 1 <* sym 'a' <* sym 'a' <|> pure 2 <* sym 'a' , pure 3 <* sym 'b' <|> pure 6 , fmap (+1) $ pure 4 <* sym 'b' <|> pure 7 <|> pure 5 <* sym 'a' ] re4 = sym 'a' *> many (sym 'b') <* sym 'a' re5 = (sym 'a' <|> sym 'a' *> sym 'a') *> many (sym 'a') re6 = many (pure 3 <* sym 'a' <* sym 'a' <* sym 'a' <|> pure 1 <* sym 'a') -- Regular expression from the weighted regexp paper. re7 = let many_A_or_B = many (sym 'a' <|> sym 'b') in (,) <$> many ((,,,) <$> many_A_or_B <*> sym 'c' <*> many_A_or_B <*> sym 'c') <*> many_A_or_B re8 = (,) <$> many (sym 'a' <|> sym 'b') <*> many (sym 'b' <|> sym 'c') -- NB: we don't test these against the reference impl, 'cause it will loop! re9 = many (sym 'a' <|> empty) <* sym 'b' re10 = few (sym 'a' <|> empty) <* sym 'b' prop re f s = let fs = map f s in reference re fs == (fs =~ re) prop_withMatched = let re = withMatched $ many (string "a" <|> string "ba") in \str -> case map ab str =~ re of Nothing -> True Just (x, y) -> concat x == y -- Because we have 2 slightly different algorithms for recognition and parsing, -- we test that they agree testRecognitionAgainstParsing re f s = let fs = map f s in isJust (fs =~ re) == isJust (fs =~ (re *> pure ())) tests = testGroup "Tests" [ testGroup "Engine tests" [ t "re1" 10 $ prop re1 a , t "re2" 10 $ prop re2 ab , t "re3" 10 $ prop re3 ab , t "re4" 10 $ prop re4 ab , t "re5" 10 $ prop re5 a , t "re6" 10 $ prop re6 a , t "re7" 7 $ prop re7 abc , t "re8" 7 $ prop re8 abc ] , testGroup "Recognition vs parsing" [ t "re1" 10 $ testRecognitionAgainstParsing re1 a , t "re2" 10 $ testRecognitionAgainstParsing re2 ab , t "re3" 10 $ testRecognitionAgainstParsing re3 ab , t "re4" 10 $ testRecognitionAgainstParsing re4 ab , t "re5" 10 $ testRecognitionAgainstParsing re5 a , t "re6" 10 $ testRecognitionAgainstParsing re6 a , t "re7" 7 $ testRecognitionAgainstParsing re7 abc , t "re8" 7 $ testRecognitionAgainstParsing re8 abc , t "re8" 10 $ testRecognitionAgainstParsing re9 ab , t "re8" 10 $ testRecognitionAgainstParsing re10 ab ] , testProperty "withMatched" prop_withMatched , testGroup "Tests for matching functions" [ testGroup "findFirstPrefix" [ u "t1" (findFirstPrefix ("a" <|> "ab") "abc") (Just ("a","bc")) , u "t2" (findFirstPrefix ("ab" <|> "a") "abc") (Just ("ab","c")) , u "t3" (findFirstPrefix "bc" "abc") Nothing ] , testGroup "findFirstInfix" [ u "t1" (findFirstInfix ("a" <|> "ab") "tabc") (Just ("t", "a","bc")) , u "t2" (findFirstInfix ("ab" <|> "a") "tabc") (Just ("t", "ab","c")) ] , testGroup "findLongestPrefix" [ u "t1" (findLongestPrefix ("a" <|> "ab") "abc") (Just ("ab","c")) , u "t2" (findLongestPrefix ("ab" <|> "a") "abc") (Just ("ab","c")) , u "t3" (findLongestPrefix "bc" "abc") Nothing ] , testGroup "findLongestInfix" [ u "t1" (findLongestInfix ("a" <|> "ab") "tabc") (Just ("t", "ab","c")) , u "t2" (findLongestInfix ("ab" <|> "a") "tabc") (Just ("t", "ab","c")) , u "t3" (findLongestInfix "bc" "tabc") (Just ("ta", "bc","")) ] , testGroup "findShortestPrefix" [ u "t1" (findShortestPrefix ("a" <|> "ab") "abc") (Just ("a","bc")) , u "t2" (findShortestPrefix ("ab" <|> "a") "abc") (Just ("a","bc")) , u "t3" (findShortestPrefix "bc" "abc") Nothing ] , testGroup "findShortestInfix" [ u "t1" (findShortestInfix ("a" <|> "ab") "tabc") (Just ("t", "a","bc")) , u "t2" (findShortestInfix ("ab" <|> "a") "tabc") (Just ("t", "a","bc")) , u "t3" (findShortestInfix "bc" "tabc") (Just ("ta", "bc","")) ] , testGroup "replace" [ u "t1" (replace ("x" <$ "a" <|> "y" <$ "ab") "tabc") "tyc" , u "t2" (replace ("y" <$ "ab" <|> "x" <$ "a") "tabc") "tyc" , u "t3" (replace ("x" <$ "bc") "tabc") "tax" , u "t4" (replace ("y" <$ "a" <|> "x" <$ "ab") "tacabc") "tycxc" ] ] , stateQueueTests ] where t name n = localOption (SmallCheckDepth n) . testProperty name u name real ideal = testCase name (assertEqual "" real ideal) main = defaultMain tests regex-applicative-0.3.3.1/tests/StateQueue.hs0000644000000000000000000000316012463770072017246 0ustar0000000000000000{-# LANGUAGE FlexibleInstances, MultiParamTypeClasses, ScopedTypeVariables #-} module StateQueue where import Test.Tasty import Test.Tasty.SmallCheck import Test.SmallCheck.Series import Control.Applicative import Text.Regex.Applicative.StateQueue as SQ fromElems :: [(a, Maybe Int)] -> StateQueue a fromElems = foldl f SQ.empty where f sq (x, mbKey) = maybe insert insertUnique mbKey x sq size :: StateQueue a -> Int size = length . getElements instance (Monad m, Serial m a) => Serial m (StateQueue a) where series = fromElems <$> series stateQueueTests = testGroup "StateQueue" [ testProperty "Insertion increments the # of elements" $ \sq (i :: Int) -> size (insert i sq) == size sq + 1 , testProperty "insertUnique increments the # of elements by 0 or 1" $ \sq (i :: Int) -> let d = size (insertUnique i i sq) - size sq in d == 0 || d == 1 , testProperty "insertUnique is idempotent" $ \sq (i :: Int) -> let f = insertUnique i i in f sq == (f . f) sq , testProperty "insert doesn't affect insertUnique" $ \(i :: Int) -> exists $ \sq -> let sq' = insert i sq in insertUnique i i sq' /= sq' , testProperty "insertUnique only cares about keys, not values" $ \sq i j -> let sq' = insertUnique i i sq in insertUnique i j sq' == sq' , testProperty "insert puts in the back" $ \sq (i :: Int) -> let sq' = insert i sq in last (getElements sq') == i , testProperty "insertUnique puts in the back" $ \sq i -> let sq' = insertUnique i i sq in sq' /= sq ==> last (getElements sq') == i ] regex-applicative-0.3.3.1/benchmark/benchmark.hs0000644000000000000000000000043713220225302017666 0ustar0000000000000000import Data.List import Data.Traversable import Data.Maybe import Criterion.Main import Text.Regex.Applicative regex = sequenceA (replicate 500 $ sym 'a' <|> pure 'b') <* sequenceA (replicate 500 $ sym 'a') main = defaultMain [bench "aaaaa" $ whnf (match regex) $ replicate 800 'a'] regex-applicative-0.3.3.1/LICENSE0000644000000000000000000000204311604104433014453 0ustar0000000000000000Copyright (c) 2011 Roman Cheplyaka Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. regex-applicative-0.3.3.1/Setup.hs0000644000000000000000000000005611604104217015104 0ustar0000000000000000import Distribution.Simple main = defaultMain regex-applicative-0.3.3.1/regex-applicative.cabal0000644000000000000000000000427113526335604020063 0ustar0000000000000000Name: regex-applicative Version: 0.3.3.1 Synopsis: Regex-based parsing with applicative interface Description: regex-applicative is a Haskell library for parsing using regular expressions. Parsers can be built using Applicative interface. Homepage: https://github.com/feuerbach/regex-applicative License: MIT License-file: LICENSE Author: Roman Cheplyaka Maintainer: Roman Cheplyaka Category: Text Build-type: Simple Extra-source-files: README.md CREDITS.md CHANGES.md Cabal-version: >=1.10 Source-repository head type: git location: git://github.com/feuerbach/regex-applicative.git Library Default-language: Haskell2010 Build-depends: base < 5, containers, transformers Exposed-modules: Text.Regex.Applicative Text.Regex.Applicative.Object Text.Regex.Applicative.Common Text.Regex.Applicative.Reference Text.Regex.Applicative.StateQueue Other-modules: Text.Regex.Applicative.Interface Text.Regex.Applicative.Types Text.Regex.Applicative.Compile GHC-Options: -Wall -fno-warn-name-shadowing -fno-warn-missing-signatures -fno-warn-orphans Test-Suite test-regex-applicative type: exitcode-stdio-1.0 hs-source-dirs: tests main-is: test.hs other-modules: StateQueue GHC-Options: -threaded Default-language: Haskell2010 Build-depends: base < 5, containers, transformers, smallcheck >= 1.0, tasty, tasty-smallcheck, tasty-hunit, regex-applicative Benchmark bench-regex-applicative type: exitcode-stdio-1.0 hs-source-dirs: benchmark main-is: benchmark.hs build-depends: base <5 , criterion , regex-applicative default-language: Haskell2010 regex-applicative-0.3.3.1/README.md0000644000000000000000000000251212200265406014727 0ustar0000000000000000regex-applicative ================= *regex-applicative* is aimed to be an efficient and easy to use parsing combinator library for Haskell based on regular expressions. Perl programmers often use regular expressions for parsing, even if it is not an appropriate tool for the job, because Perl has so good support for regexps. The opposite seems to be valid about Haskell programmers -- they use parsing combinators (which recognize context-free or even context-sensitive grammars), even when the language is actually regular! Hopefully, this library will improve the situation. Installation ------------ Install this library using `cabal-install` tool: cabal update cabal install regex-applicative Documentation ------------- The [API reference][haddock] is available from Hackage. To get started, see some [examples][examples] on the wiki. Other resources --------------- * [This package on Hackage][hackage] * [Issue tracker][issues] * [Repository][github] [examples]: https://github.com/feuerbach/regex-applicative/wiki/Examples [haddock]: http://hackage.haskell.org/packages/archive/regex-applicative/latest/doc/html/Text-Regex-Applicative.html [hackage]: http://hackage.haskell.org/package/regex-applicative [issues]: https://github.com/feuerbach/regex-applicative/issues [github]: https://github.com/feuerbach/regex-applicative regex-applicative-0.3.3.1/CREDITS.md0000644000000000000000000000053511652346034015101 0ustar0000000000000000The current implementation is based on ideas [publicized][cox] by Russ Cox. The original implementation was inspired and heavily based on the ideas from ["A Play on Regular Expressions"][play] by Sebastian Fischer, Frank Huch and Thomas Wilke. [cox]: http://swtch.com/~rsc/regexp/ [play]: http://sebfisch.github.com/haskell-regexp/regexp-play.pdf regex-applicative-0.3.3.1/CHANGES.md0000644000000000000000000000260113526335706015056 0ustar0000000000000000Changes ======= 0.3.3.1 ------- Make a release to refresh the haddocks on hackage (see ). 0.3.3 ----- Add `replace` 0.3.2.1 ------- * Use strict left fold in decimal/hexadecimal * Include a missing test module in the sdist tarball 0.3.2 ----- Add `msym` 0.3.1 ----- Add `comap` 0.3.0.3 ------- * Fix the test suite * Fix build with GHC 7.9 0.3.0.2 ------- Fix the test suite 0.3.0.1 ------- Port the test suite to tasty 0.3 --- * Add a new module, `Text.Regex.Applicative.Common`, which contains some commonly used regexps (by Aleksey Khudyakov) * Improve the test suite 0.2.1 ----- * Add the `withMatched` function * Make matching functions a bit more lax * Fix a bug in the `empty` method 0.2 --- * Infix matching functions * Improved documentation * Improved performance * Improved portability 0.1.5 ----- * Expose Object interface * Allow matching prefixes rather than the whole string * Add non-greedy repetitions 0.1.4 ----- * Completely rewrite the engine. Now it's faster and runs in constant space. * Add 'string' function and 'IsString' instance. 0.1.3 ----- * Fix a .cabal-file issue introduced in 0.1.2 * Change the fixity of =~ 0.1.2 ----- * Relax the constraint on the containers version 0.1.1 --- * Fix a bug in 'reFoldl' and 'many' * "Lazy" infinite regexes are no longer supported 0.1 --- * Initial release