Skip to content

Commit

Permalink
Docx reader: support task lists.
Browse files Browse the repository at this point in the history
This also fixes a small bug in parsing delimiters in numbered lists,
which led to the default delimiter being used wrongly in some cases.

Closes #8211.
  • Loading branch information
jgm committed Jun 4, 2024
1 parent e71f171 commit 3952d4d
Show file tree
Hide file tree
Showing 6 changed files with 55 additions and 26 deletions.
71 changes: 48 additions & 23 deletions src/Text/Pandoc/Readers/Docx/Lists.hs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{- |
Module : Text.Pandoc.Readers.Docx.Lists
Expand All @@ -18,6 +19,7 @@ module Text.Pandoc.Readers.Docx.Lists ( blocksToBullets
) where

import Data.List
import Data.Char (isDigit)
import Data.Maybe
import Data.String (fromString)
import qualified Data.Text as T
Expand Down Expand Up @@ -58,9 +60,9 @@ listStyleMap = [("upperLetter", UpperAlpha),
("decimal", Decimal)]

listDelimMap :: [(T.Text, ListNumberDelim)]
listDelimMap = [("%1)", OneParen),
("(%1)", TwoParens),
("%1.", Period)]
listDelimMap = [("%)", OneParen),
("(%)", TwoParens),
("%.", Period)]

getListType :: Block -> Maybe ListType
getListType b@(Div (_, _, kvs) _) | isListItem b =
Expand All @@ -72,7 +74,7 @@ getListType b@(Div (_, _, kvs) _) | isListItem b =
case frmt of
Just "bullet" -> Just Itemized
Just f ->
case txt of
case T.filter (not . isDigit) <$> txt of
Just t -> Just $ Enumerated (
fromMaybe 1 (start >>= safeRead) :: Int,
fromMaybe DefaultStyle (lookup f listStyleMap),
Expand Down Expand Up @@ -122,25 +124,48 @@ separateBlocks blks = foldr separateBlocks' [[]] (reverse blks)

flatToBullets' :: Integer -> [Block] -> [Block]
flatToBullets' _ [] = []
flatToBullets' num xs@(b : elems)
| getLevelN b == num = b : flatToBullets' num elems
| otherwise =
let bNumId = getNumIdN b
bLevel = getLevelN b
(children, remaining) =
span
(\b' ->
getLevelN b' > bLevel ||
(getLevelN b' == bLevel && getNumIdN b' == bNumId))
xs
in
case getListType b of
Just (Enumerated attr) ->
OrderedList attr (separateBlocks $ flatToBullets' bLevel children) :
flatToBullets' num remaining
_ ->
BulletList (separateBlocks $ flatToBullets' bLevel children) :
flatToBullets' num remaining
flatToBullets' num xs@(b : elems) =
if getLevelN b == num
then (case bCheckmark of
Just checked -> addCheckmark checked b
Nothing -> b) : flatToBullets' num elems
else case getListType b of
Just (Enumerated attr) ->
OrderedList attr (separateBlocks $ flatToBullets' bLevel children) :
flatToBullets' num remaining
_ ->
BulletList (separateBlocks $ flatToBullets' bLevel children) :
flatToBullets' num remaining
where
bNumId = getNumIdN b
bLevel = getLevelN b
isCheckmark (Just "\9744") = Just False
isCheckmark (Just "\9746") = Just True
isCheckmark _ = Nothing
bCheckmark =
case getListType b of
Just Itemized -> isCheckmark (getText b)
_ -> Nothing
addCheckmark checked (Div attrs [Para ils]) =
Div attrs [Para (Str (if checked then "\9746" else "\9744") : Space : ils)]
addCheckmark checked (Div attrs [Plain ils]) =
Div attrs [Plain (Str (if checked then "\9746" else "\9744") : Space : ils)]
addCheckmark _ x = x
(children, remaining) =
span
(\b' ->
getLevelN b' > bLevel ||
(getLevelN b' == bLevel &&
(getNumIdN b' == bNumId ||
(case bCheckmark of
Just _ ->
case getText b' of
Just "" -> True
Just " " -> True
Just x -> isJust (isCheckmark (Just x))
Nothing -> False
Nothing -> False))))
xs

flatToBullets :: [Block] -> [Block]
flatToBullets elems = flatToBullets' (-1) elems
Expand Down
4 changes: 4 additions & 0 deletions test/Tests/Readers/Docx.hs
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,10 @@ tests = [ testGroup "document"
"definition lists"
"docx/definition_list.docx"
"docx/definition_list.native"
, testCompare
"task lists"
"docx/task_list.docx"
"docx/task_list.native"
, testCompare
"custom defined lists in styles"
"docx/german_styled_lists.docx"
Expand Down
4 changes: 2 additions & 2 deletions test/docx/deep_normalize.native
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[OrderedList (1,Decimal,OneParen)
[[Para [Str "This",Space,Str "is",Space,Str "at",Space,Str "the",Space,Str "first",Space,Str "level"]
,OrderedList (1,LowerAlpha,DefaultDelim)
,OrderedList (1,LowerAlpha,OneParen)
[[Para [Str "This",Space,Str "is",Space,Str "at",Space,Str "the",Space,Str "second",Space,Str "level"]
,OrderedList (1,LowerRoman,DefaultDelim)
,OrderedList (1,LowerRoman,OneParen)
[[Para [Str "This",Space,Str "is",Space,Emph [Str "at",Space,Strong [Str "the",Space,Span ("",["mark"],[]) [Str "th"],Str "i",Span ("",["mark"],[]) [Str "rd"],Space,Str "level"],Str ",",Space,Str "and",Space,Str "I",Space,Str "want",Space,Str "to"],Space,Str "test",Space,Str "normalization",Space,Str "here."]]]]]]]]
Binary file modified test/docx/golden/lists.docx
Binary file not shown.
2 changes: 1 addition & 1 deletion test/docx/lists.native
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
,OrderedList (1,Decimal,Period)
[[Para [Str "one"]]
,[Para [Str "two"]
,OrderedList (1,LowerAlpha,DefaultDelim)
,OrderedList (1,LowerAlpha,Period)
[[Para [Str "a"]]
,[Para [Str "b"]]]]]
,BulletList
Expand Down
Binary file added test/docx/task_list.docx
Binary file not shown.

0 comments on commit 3952d4d

Please sign in to comment.