Skip to content

Commit

Permalink
feat: Add line breaks after 80-120 chars (#295)
Browse files Browse the repository at this point in the history
Co-authored-by: Konstantin <[email protected]>
  • Loading branch information
hf-kklein and Konstantin authored Nov 13, 2024
1 parent fa7a046 commit e603fc1
Show file tree
Hide file tree
Showing 12 changed files with 846 additions and 497 deletions.
6 changes: 5 additions & 1 deletion src/rebdhuhn/graphviz.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,20 @@
from rebdhuhn.graph_utils import _mark_last_common_ancestors
from rebdhuhn.kroki import DotToSvgConverter
from rebdhuhn.models import DecisionNode, EbdGraph, EbdGraphEdge, EndNode, OutcomeNode, StartNode, ToNoEdge, ToYesEdge
from rebdhuhn.utils import add_line_breaks

ADD_INDENT = " " #: This is just for style purposes to make the plantuml files human-readable.

_LABEL_MAX_LINE_LENGTH = 80


def _format_label(label: str) -> str:
"""
Converts the given string e.g. a text for a node to a suitable output for dot. It replaces newlines (`\n`) with
the HTML-tag `<BR>`.
"""
return escape(label).replace("\n", '<BR align="left"/>')
label_with_linebreaks = add_line_breaks(label, max_line_length=_LABEL_MAX_LINE_LENGTH, line_sep="\n")
return escape(label_with_linebreaks).replace("\n", '<BR align="left"/>')
# escaped_str = re.sub(r"^(\d+): ", r"<B>\1: </B>", label)
# escaped_str = label.replace("\n", '<BR align="left"/>')
# return f'<{escaped_str}<BR align="left"/>>'
Expand Down
58 changes: 58 additions & 0 deletions src/rebdhuhn/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
"""utility functions"""


def _split_string(input_string: str, max_length: int) -> list[str]:
"""
Splits the input string into multiple parts, each with a maximum length of `max_length`.
The split occurs at the last space before reaching the limit.
:param input_string: The string to be split.
:param max_length: The maximum length for each part (default is 80).
:return: A list of strings, each of length up to `max_length`.
"""
parts: list[str] = []
hurenkinder_length = int(0.125 * max_length)
grace_length = int(1.5 * max_length)
while len(input_string) > max_length:
# Find the last space before the max length
split_index_line_break = input_string.find("\n", 0, grace_length) # we prefer early line breaks
split_index_whitespace: int = input_string.rfind(" ", 0, max_length) # but late white spaces
split_index: int
# If no space is found, split at the max length
if split_index_line_break != -1: # prefer this one
split_index = split_index_line_break
elif split_index_whitespace != -1:
split_index = split_index_whitespace
else:
split_index = max_length
# Extract the part and append to the list
part: str = input_string[:split_index].rstrip()
if split_index_line_break != -1:
part = part.replace("\n", "")
parts.append(part)

# Update the input_string to the remaining part
input_string = input_string[split_index:].lstrip()
remaining_text_is_shorter_than_hurenkinder_threshold = len(input_string) <= hurenkinder_length
line_without_hurenkinder_within_grace_length = len(input_string) + len(part) <= grace_length
if remaining_text_is_shorter_than_hurenkinder_threshold and line_without_hurenkinder_within_grace_length:
parts[-1] += " " + input_string
input_string = ""
break
# Add the remaining string if any
if input_string:
parts.append(input_string)

return parts


def add_line_breaks(text: str, max_line_length: int = 80, line_sep: str = "\n") -> str:
"""
Adds line_sep lines breaks between words after max max_line_length characters.
If there already is a line break within the next max_line_length/2 after the max_line_length, we prefer to use that
one instead of adding a new one. This is because we cannot decide if an existing line break is just an artefact of
the .docx files (e.g. word break because the width of a column is limited) or if it has a functional meaning.
A line break with a meaning is e.g. "Cluster Ablehnung:\n ..." <- here the line break structures the text in a good
way, whereas `...Bilanzierungs-\nverantwortung...` is just an artefact.
"""
return line_sep.join(_split_string(text, max_line_length))
313 changes: 157 additions & 156 deletions unittests/__snapshots__/test_table_to_graph.ambr

Large diffs are not rendered by default.

25 changes: 24 additions & 1 deletion unittests/output/E_0003.dot.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
56 changes: 36 additions & 20 deletions unittests/output/E_0003_without_watermark.dot.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit e603fc1

Please sign in to comment.