diff --git a/lib/Scanner.hs b/lib/Scanner.hs index 7cfd18f..902eaf8 100644 --- a/lib/Scanner.hs +++ b/lib/Scanner.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE BangPatterns #-} -- | Fast not-backtracking incremental scanner for bytestrings -- @@ -14,6 +15,7 @@ module Scanner , scan , scanOnly , scanLazy +, scanLazyWithLocation , scanWith , anyWord8 , anyChar8 @@ -58,16 +60,26 @@ scanOnly s bs = go (scan s bs) -- | Scan lazy bytestring by resupplying scanner with chunks scanLazy :: Scanner a -> Lazy.ByteString -> Either String a -scanLazy s lbs = go (scan s) (Lazy.ByteString.toChunks lbs) +scanLazy s lbs = case scanLazyWithLocation s lbs of + Right a -> Right a + Left (_, err) -> Left err + +-- | Scan lazy bytestring by resupplying scanner with chunks +-- +-- Unlike `scanLazy`, it returns the offset where the error occurred +scanLazyWithLocation :: Scanner a -> Lazy.ByteString -> Either (Int, String) a +scanLazyWithLocation s lbs = go 0 (scan s) (Lazy.ByteString.toChunks lbs) where - go more chunks = + go !pos more chunks = let (chunk, chunks') = case chunks of [] -> (ByteString.empty, []) (c:cs) -> (c, cs) in case more chunk of Done _ r -> Right r - Fail _ err -> Left err - More more' -> go more' chunks' + Fail rest err -> + let pos' = pos + ByteString.length chunk - ByteString.length rest + in Left (pos', err) + More more' -> go (pos + ByteString.length chunk) more' chunks' -- | Scan with the provided resupply action scanWith :: Monad m => m ByteString -> Scanner a -> ByteString -> m (Result a) diff --git a/spec/spec.hs b/spec/spec.hs index 1786429..e0f2731 100644 --- a/spec/spec.hs +++ b/spec/spec.hs @@ -21,6 +21,7 @@ main = hspec $ do takeWhileSpec lookAheadSpec scanWithSpec + scanLazyWithLocationSpec anyWord8Spec :: Spec anyWord8Spec = describe "anyWord8" $ do @@ -144,3 +145,14 @@ scanWithSpec = describe "scanWith" $ do let Just (Scanner.Done _ r) = scanWith (Just "b") p bs r `shouldBe` 'b' + +scanLazyWithLocationSpec :: Spec +scanLazyWithLocationSpec = describe "scanLazyWithLocation" $ do + context "when fails" $ do + it "returns the error location" $ do + let bs = Lazy.ByteString.fromChunks ["a", "b", "c"] + p = do + char8 'a' + char8 'b' + char8 'd' + scanLazyWithLocation p bs `shouldBe` Left (3, "unexpected word")