diff --git a/src/stratigraphy/sidebar/a_above_b_sidebar.py b/src/stratigraphy/sidebar/a_above_b_sidebar.py index 1aa8158..7a13ad5 100644 --- a/src/stratigraphy/sidebar/a_above_b_sidebar.py +++ b/src/stratigraphy/sidebar/a_above_b_sidebar.py @@ -162,16 +162,16 @@ def identify_groups( Returns: list[IntervalBlockGroup]: A list of groups, where each group is a IntervalBlockGroup. - Example: + Example return value: [ - { - "depth_intervals": [BoundaryInterval(None, 0.1), BoundaryInterval(0.1, 0.3), ...], - "blocks": [DescriptionBlock(...), DescriptionBlock(...), ...] - }, - { - "depth_intervals": [BoundaryInterval(0.3, 0.7)], - "blocks": [DescriptionBlock(...), DescriptionBlock(...), ...] - }, + IntervalBlockGroup( + depth_intervals=[AAboveBInterval(None, 0.1), AAboveBInterval(0.1, 0.3), ...], + blocks=[TextBlock(...), TextBlock(...), ...] + ), + IntervalBlockGroup( + depth_intervals=[AAboveBInterval(0.3, 0.7)], + blocks=[TextBlock(...), TextBlock(...), ...] + ), ... ] """ diff --git a/src/stratigraphy/sidebar/a_above_b_sidebar_extractor.py b/src/stratigraphy/sidebar/a_above_b_sidebar_extractor.py index 6824596..a8391e0 100644 --- a/src/stratigraphy/sidebar/a_above_b_sidebar_extractor.py +++ b/src/stratigraphy/sidebar/a_above_b_sidebar_extractor.py @@ -15,15 +15,15 @@ class AAboveBSidebarExtractor: def find_in_words( all_words: list[TextWord], used_entry_rects: list[fitz.Rect], sidebar_params: dict ) -> list[AAboveBSidebar]: - """Construct all possible AAboveBSidebar objects from the given DepthColumnEntry objects. + """Construct all possible AAboveBSidebar objects from the given words. Args: all_words (list[TextLine]): All words in the page. used_entry_rects (list[fitz.Rect]): Part of the document to ignore. - sidebar_params (dict): Parameters for the BoundaryDepthColumn objects. + sidebar_params (dict): Parameters for the AAboveBSidebar objects. Returns: - list[AAboveBSidebar]: Found BoundaryDepthColumn objects. + list[AAboveBSidebar]: Found AAboveBSidebar objects. """ entries = [ entry @@ -57,10 +57,10 @@ def find_in_words( if all(not other.strictly_contains(column) for other in numeric_columns) ] - boundary_depth_column_validator = AAboveBSidebarValidator(all_words, **sidebar_params) + sidebar_validator = AAboveBSidebarValidator(all_words, **sidebar_params) numeric_columns = [ - boundary_depth_column_validator.reduce_until_valid(column) + sidebar_validator.reduce_until_valid(column) for numeric_column in numeric_columns for column in numeric_column.break_on_double_descending() # when we have a perfect arithmetic progression, this is usually just a scale @@ -69,6 +69,6 @@ def find_in_words( ] return sorted( - [column for column in numeric_columns if column and boundary_depth_column_validator.is_valid(column)], + [column for column in numeric_columns if column and sidebar_validator.is_valid(column)], key=lambda column: len(column.entries), ) diff --git a/src/stratigraphy/sidebar/a_above_b_sidebar_validator.py b/src/stratigraphy/sidebar/a_above_b_sidebar_validator.py index 0dfbfe9..3dbd012 100644 --- a/src/stratigraphy/sidebar/a_above_b_sidebar_validator.py +++ b/src/stratigraphy/sidebar/a_above_b_sidebar_validator.py @@ -104,7 +104,7 @@ def correct_OCR_mistakes(self, sidebar: AAboveBSidebar) -> AAboveBSidebar | None sidebar (AAboveBSidebar): The AAboveBSidebar to validate Returns: - BoundaryDepthColumn | None: The corrected depth column, or None if no correction was possible. + AAboveBSidebar | None: The corrected sidebar, or None if no correction was possible. """ new_columns = [AAboveBSidebar(entries=[])] for entry in sidebar.entries: diff --git a/src/stratigraphy/sidebar/layer_identifier_sidebar.py b/src/stratigraphy/sidebar/layer_identifier_sidebar.py index 40dde80..e2467cf 100644 --- a/src/stratigraphy/sidebar/layer_identifier_sidebar.py +++ b/src/stratigraphy/sidebar/layer_identifier_sidebar.py @@ -70,7 +70,7 @@ def identify_groups( result = [] for block in blocks: depth_intervals = [] - depth_interval = AToBInterval.get_depth_interval_from_textblock(block) + depth_interval = AToBInterval.get_depth_interval_from_lines(block.lines) if depth_interval: depth_intervals.append(depth_interval) result.append(IntervalBlockGroup(depth_intervals=depth_intervals, blocks=[block])) diff --git a/src/stratigraphy/util/interval.py b/src/stratigraphy/util/interval.py index 00a8d6f..479e8ff 100644 --- a/src/stratigraphy/util/interval.py +++ b/src/stratigraphy/util/interval.py @@ -179,22 +179,22 @@ def matching_blocks( return [] @classmethod - def get_depth_interval_from_textblock(cls, block: TextBlock) -> AToBInterval | None: - """Extract depth interval from a material description block. + def get_depth_interval_from_lines(cls, lines: list[TextLine]) -> AToBInterval | None: + """Extract depth interval from text lines. - For borehole profiles in the Deriaz layout, the depth interval is usually found in the text description - of the material. Often, these text descriptions contain a further separation into multiple sub layers. + For borehole profiles in the Deriaz layout, the depth interval is usually found in the text of the material + description. Often, these text descriptions contain a further separation into multiple sub layers. These sub layers have their own depth intervals. This function extracts the overall depth interval, spanning across all mentioned sub layers. Args: - block (TextBlock): The block to calculate the depth interval for. + lines (list[TextLine]): The lines to extract the depth interval from. Returns: - AToBInterval | None: The depth interval. + AToBInterval | None: The depth interval (if any) or None (if no depth interval was found). """ depth_entries = [] - for line in block.lines: + for line in lines: try: layer_depth_entry = AToBDepthColumnEntry.from_text(line.text, line.rect, require_start_of_string=False) # require_start_of_string = False because the depth interval may not always start at the beginning