diff --git a/skylighting-core/src/Skylighting/Parser.hs b/skylighting-core/src/Skylighting/Parser.hs index c77f9724..e645d422 100644 --- a/skylighting-core/src/Skylighting/Parser.hs +++ b/skylighting-core/src/Skylighting/Parser.hs @@ -227,6 +227,7 @@ getParser casesensitive syntaxname itemdatas lists kwattr cattr el = do let str' = getAttrValue "String" el let insensitive = vBool (not casesensitive) $ getAttrValue "insensitive" el let includeAttrib = vBool False $ getAttrValue "includeAttrib" el + let weakDelim = Set.fromList $ T.unpack $ getAttrValue "weakDeliminator" el let lookahead = vBool False $ getAttrValue "lookAhead" el let firstNonSpace = vBool False $ getAttrValue "firstNonSpace" el let column' = getAttrValue "column" el @@ -277,6 +278,7 @@ getParser casesensitive syntaxname itemdatas lists kwattr cattr el = do then M.lookup cattr itemdatas else M.lookup attribute itemdatas , rIncludeAttribute = includeAttrib + , rWeakDeliminators = weakDelim , rDynamic = dynamic , rCaseSensitive = not insensitive , rChildren = children diff --git a/skylighting-core/src/Skylighting/Tokenizer.hs b/skylighting-core/src/Skylighting/Tokenizer.hs index 50a8ddc0..f21376f0 100644 --- a/skylighting-core/src/Skylighting/Tokenizer.hs +++ b/skylighting-core/src/Skylighting/Tokenizer.hs @@ -334,7 +334,8 @@ tryRule rule inp = do stringDetect (rDynamic rule) (rCaseSensitive rule) s inp WordDetect s -> withAttr attr $ - wordDetect (rCaseSensitive rule) s inp + wordDetect (rCaseSensitive rule) + (rWeakDeliminators rule) s inp LineContinue -> withAttr attr $ lineContinue inp DetectSpaces -> withAttr attr $ detectSpaces inp DetectIdentifier -> withAttr attr $ detectIdentifier inp @@ -380,9 +381,9 @@ withAttr tt p = do then return Nothing else return $ Just (tt, res) -wordDetect :: Bool -> Text -> ByteString -> TokenizerM Text -wordDetect caseSensitive s inp = do - wordBoundary inp +wordDetect :: Bool -> Set.Set Char -> Text -> ByteString -> TokenizerM Text +wordDetect caseSensitive weakDelims s inp = do + wordBoundary weakDelims inp t <- decodeBS $ UTF8.take (Text.length s) inp -- we assume here that the case fold will not change length, -- which is safe for ASCII keywords and the like... @@ -395,7 +396,7 @@ wordDetect caseSensitive s inp = do let d = case UTF8.uncons rest of Nothing -> '\n' Just (x,_) -> x - guard $ isWordBoundary c d + guard $ isWordBoundary weakDelims c d takeChars (Text.length t) stringDetect :: Bool -> Bool -> Text -> ByteString -> TokenizerM Text @@ -549,7 +550,7 @@ regExpr :: Bool -> RE -> ByteString -> TokenizerM Text regExpr dynamic re inp = do -- return $! traceShowId $! (reStr, inp) let reStr = reString re - when (BS.take 2 reStr == "\\b") $ wordBoundary inp + when (BS.take 2 reStr == "\\b") $ wordBoundary mempty inp regex <- case compileRE re of Right r -> return r Left e -> throwError $ @@ -569,16 +570,18 @@ regExpr dynamic re inp = do toSlice :: ByteString -> (Int, Int) -> ByteString toSlice bs (off, len) = BS.take len $ BS.drop off bs -wordBoundary :: ByteString -> TokenizerM () -wordBoundary inp = do +wordBoundary :: Set.Set Char -> ByteString -> TokenizerM () +wordBoundary weakDelims inp = do case UTF8.uncons inp of Nothing -> return () Just (d, _) -> do c <- gets prevChar - guard $ isWordBoundary c d + guard $ isWordBoundary weakDelims c d -isWordBoundary :: Char -> Char -> Bool -isWordBoundary c d = isWordChar c /= isWordChar d +isWordBoundary :: Set.Set Char -> Char -> Char -> Bool +isWordBoundary weakDelims c d = + (isWordChar c || c `Set.member` weakDelims) /= + (isWordChar d || d `Set.member` weakDelims) decodeBS :: ByteString -> TokenizerM Text decodeBS bs = case decodeUtf8' bs of @@ -658,7 +661,7 @@ pCChar = do parseInt :: ByteString -> TokenizerM Text parseInt inp = do - wordBoundary inp + wordBoundary mempty inp case A.parseOnly (A.match (pHex <|> pOct <|> pDec)) inp of Left _ -> mzero Right (r,_) -> takeChars (BS.length r) -- assumes ascii @@ -670,7 +673,7 @@ pDec = do parseOct :: ByteString -> TokenizerM Text parseOct inp = do - wordBoundary inp + wordBoundary mempty inp case A.parseOnly (A.match pHex) inp of Left _ -> mzero Right (r,_) -> takeChars (BS.length r) -- assumes ascii @@ -685,7 +688,7 @@ pOct = do parseHex :: ByteString -> TokenizerM Text parseHex inp = do - wordBoundary inp + wordBoundary mempty inp case A.parseOnly (A.match pHex) inp of Left _ -> mzero Right (r,_) -> takeChars (BS.length r) -- assumes ascii @@ -706,7 +709,7 @@ mbPlusMinus = () <$ A.satisfy (A.inClass "+-") <|> return () parseFloat :: ByteString -> TokenizerM Text parseFloat inp = do - wordBoundary inp + wordBoundary mempty inp case A.parseOnly (A.match pFloat) inp of Left _ -> mzero Right (r,_) -> takeChars (BS.length r) -- assumes all ascii diff --git a/skylighting-core/src/Skylighting/Types.hs b/skylighting-core/src/Skylighting/Types.hs index 9d20b473..47d056f7 100644 --- a/skylighting-core/src/Skylighting/Types.hs +++ b/skylighting-core/src/Skylighting/Types.hs @@ -130,6 +130,7 @@ data Rule = Rule{ rMatcher :: !Matcher , rAttribute :: !TokenType , rIncludeAttribute :: !Bool + , rWeakDeliminators :: Set.Set Char , rDynamic :: !Bool , rCaseSensitive :: !Bool , rChildren :: ![Rule] diff --git a/skylighting-core/test/test-skylighting.hs b/skylighting-core/test/test-skylighting.hs index a4f00c6e..b4923629 100644 --- a/skylighting-core/test/test-skylighting.hs +++ b/skylighting-core/test/test-skylighting.hs @@ -195,6 +195,16 @@ main = do @=? tokenize defConfig bash "f() {\n echo > f\n}\n" + , testCase "C floating-point literal (#174)" $ Right + [ [ ( DataTypeTok , "double") + , ( NormalTok , " x " ) + , ( OperatorTok , "=" ) + , ( NormalTok , " " ) + , ( FloatTok , "0.5") + , ( OperatorTok , ";" ) ] ] + @=? tokenize defConfig c + "double x = 0.5;\n" + ] ]