feat: Add line breaks after 80-120 chars (#295)

Co-authored-by: Konstantin <[email protected]>
Hochfrequenz · Nov 13, 2024 · e603fc1 · e603fc1
1 parent fa7a046
commit e603fc1
Show file tree

Hide file tree

Showing 12 changed files with 846 additions and 497 deletions.
diff --git a/src/rebdhuhn/graphviz.py b/src/rebdhuhn/graphviz.py
@@ -10,16 +10,20 @@
 from rebdhuhn.graph_utils import _mark_last_common_ancestors
 from rebdhuhn.kroki import DotToSvgConverter
 from rebdhuhn.models import DecisionNode, EbdGraph, EbdGraphEdge, EndNode, OutcomeNode, StartNode, ToNoEdge, ToYesEdge
+from rebdhuhn.utils import add_line_breaks
 
 ADD_INDENT = "    "  #: This is just for style purposes to make the plantuml files human-readable.
 
+_LABEL_MAX_LINE_LENGTH = 80
+
 
 def _format_label(label: str) -> str:
     """
     Converts the given string e.g. a text for a node to a suitable output for dot. It replaces newlines (`\n`) with
     the HTML-tag `<BR>`.
     """
-    return escape(label).replace("\n", '<BR align="left"/>')
+    label_with_linebreaks = add_line_breaks(label, max_line_length=_LABEL_MAX_LINE_LENGTH, line_sep="\n")
+    return escape(label_with_linebreaks).replace("\n", '<BR align="left"/>')
     # escaped_str = re.sub(r"^(\d+): ", r"<B>\1: </B>", label)
     # escaped_str = label.replace("\n", '<BR align="left"/>')
     # return f'<{escaped_str}<BR align="left"/>>'

diff --git a/src/rebdhuhn/utils.py b/src/rebdhuhn/utils.py
@@ -0,0 +1,58 @@
+"""utility functions"""
+
+
+def _split_string(input_string: str, max_length: int) -> list[str]:
+    """
+    Splits the input string into multiple parts, each with a maximum length of `max_length`.
+    The split occurs at the last space before reaching the limit.
+
+    :param input_string: The string to be split.
+    :param max_length: The maximum length for each part (default is 80).
+    :return: A list of strings, each of length up to `max_length`.
+    """
+    parts: list[str] = []
+    hurenkinder_length = int(0.125 * max_length)
+    grace_length = int(1.5 * max_length)
+    while len(input_string) > max_length:
+        # Find the last space before the max length
+        split_index_line_break = input_string.find("\n", 0, grace_length)  # we prefer early line breaks
+        split_index_whitespace: int = input_string.rfind(" ", 0, max_length)  # but late white spaces
+        split_index: int
+        # If no space is found, split at the max length
+        if split_index_line_break != -1:  # prefer this one
+            split_index = split_index_line_break
+        elif split_index_whitespace != -1:
+            split_index = split_index_whitespace
+        else:
+            split_index = max_length
+        # Extract the part and append to the list
+        part: str = input_string[:split_index].rstrip()
+        if split_index_line_break != -1:
+            part = part.replace("\n", "")
+        parts.append(part)
+
+        # Update the input_string to the remaining part
+        input_string = input_string[split_index:].lstrip()
+        remaining_text_is_shorter_than_hurenkinder_threshold = len(input_string) <= hurenkinder_length
+        line_without_hurenkinder_within_grace_length = len(input_string) + len(part) <= grace_length
+        if remaining_text_is_shorter_than_hurenkinder_threshold and line_without_hurenkinder_within_grace_length:
+            parts[-1] += " " + input_string
+            input_string = ""
+            break
+    # Add the remaining string if any
+    if input_string:
+        parts.append(input_string)
+
+    return parts
+
+
+def add_line_breaks(text: str, max_line_length: int = 80, line_sep: str = "\n") -> str:
+    """
+    Adds line_sep lines breaks between words after max max_line_length characters.
+    If there already is a line break within the next max_line_length/2 after the max_line_length, we prefer to use that
+    one instead of adding a new one. This is because we cannot decide if an existing line break is just an artefact of
+    the .docx files (e.g. word break because the width of a column is limited) or if it has a functional meaning.
+    A line break with a meaning is e.g. "Cluster Ablehnung:\n ..." <- here the line break structures the text in a good
+    way, whereas `...Bilanzierungs-\nverantwortung...` is just an artefact.
+    """
+    return line_sep.join(_split_string(text, max_line_length))
diff --git a/unittests/__snapshots__/test_table_to_graph.ambr b/unittests/__snapshots__/test_table_to_graph.ambr
diff --git a/unittests/output/E_0003.dot.svg b/unittests/output/E_0003.dot.svg
diff --git a/unittests/output/E_0003_with_watermark_background_is_False.dot.svg b/unittests/output/E_0003_with_watermark_background_is_False.dot.svg
diff --git a/unittests/output/E_0003_with_watermark_background_is_True.dot.svg b/unittests/output/E_0003_with_watermark_background_is_True.dot.svg
diff --git a/unittests/output/E_0003_without_watermark.dot.svg b/unittests/output/E_0003_without_watermark.dot.svg