Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add line breaks after 80-120 chars #295

Merged
merged 6 commits into from
Nov 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion src/rebdhuhn/graphviz.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,16 +10,20 @@
from rebdhuhn.graph_utils import _mark_last_common_ancestors
from rebdhuhn.kroki import DotToSvgConverter
from rebdhuhn.models import DecisionNode, EbdGraph, EbdGraphEdge, EndNode, OutcomeNode, StartNode, ToNoEdge, ToYesEdge
from rebdhuhn.utils import add_line_breaks

ADD_INDENT = " " #: This is just for style purposes to make the plantuml files human-readable.

_LABEL_MAX_LINE_LENGTH = 80


def _format_label(label: str) -> str:
"""
Converts the given string e.g. a text for a node to a suitable output for dot. It replaces newlines (`\n`) with
the HTML-tag `<BR>`.
"""
return escape(label).replace("\n", '<BR align="left"/>')
label_with_linebreaks = add_line_breaks(label, max_line_length=_LABEL_MAX_LINE_LENGTH, line_sep="\n")
return escape(label_with_linebreaks).replace("\n", '<BR align="left"/>')
# escaped_str = re.sub(r"^(\d+): ", r"<B>\1: </B>", label)
# escaped_str = label.replace("\n", '<BR align="left"/>')
# return f'<{escaped_str}<BR align="left"/>>'
Expand Down
58 changes: 58 additions & 0 deletions src/rebdhuhn/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
"""utility functions"""


def _split_string(input_string: str, max_length: int) -> list[str]:
"""
Splits the input string into multiple parts, each with a maximum length of `max_length`.
The split occurs at the last space before reaching the limit.

:param input_string: The string to be split.
:param max_length: The maximum length for each part (default is 80).
:return: A list of strings, each of length up to `max_length`.
"""
parts: list[str] = []
hurenkinder_length = int(0.125 * max_length)
grace_length = int(1.5 * max_length)
while len(input_string) > max_length:
# Find the last space before the max length
split_index_line_break = input_string.find("\n", 0, grace_length) # we prefer early line breaks
split_index_whitespace: int = input_string.rfind(" ", 0, max_length) # but late white spaces
split_index: int
# If no space is found, split at the max length
if split_index_line_break != -1: # prefer this one
split_index = split_index_line_break
elif split_index_whitespace != -1:
split_index = split_index_whitespace
else:
split_index = max_length
# Extract the part and append to the list
part: str = input_string[:split_index].rstrip()
if split_index_line_break != -1:
part = part.replace("\n", "")
parts.append(part)

# Update the input_string to the remaining part
input_string = input_string[split_index:].lstrip()
remaining_text_is_shorter_than_hurenkinder_threshold = len(input_string) <= hurenkinder_length
line_without_hurenkinder_within_grace_length = len(input_string) + len(part) <= grace_length
if remaining_text_is_shorter_than_hurenkinder_threshold and line_without_hurenkinder_within_grace_length:
parts[-1] += " " + input_string
input_string = ""
break
# Add the remaining string if any
if input_string:
parts.append(input_string)

return parts


def add_line_breaks(text: str, max_line_length: int = 80, line_sep: str = "\n") -> str:
"""
Adds line_sep lines breaks between words after max max_line_length characters.
If there already is a line break within the next max_line_length/2 after the max_line_length, we prefer to use that
one instead of adding a new one. This is because we cannot decide if an existing line break is just an artefact of
the .docx files (e.g. word break because the width of a column is limited) or if it has a functional meaning.
A line break with a meaning is e.g. "Cluster Ablehnung:\n ..." <- here the line break structures the text in a good
way, whereas `...Bilanzierungs-\nverantwortung...` is just an artefact.
"""
return line_sep.join(_split_string(text, max_line_length))
313 changes: 157 additions & 156 deletions unittests/__snapshots__/test_table_to_graph.ambr

Large diffs are not rendered by default.

25 changes: 24 additions & 1 deletion unittests/output/E_0003.dot.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
56 changes: 36 additions & 20 deletions unittests/output/E_0003_without_watermark.dot.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading