ImagingDataCommons · hackermd · Aug 22, 2022 · Aug 30, 2022 · Aug 30, 2022 · Aug 30, 2022
diff --git a/src/highdicom/seg/sop.py b/src/highdicom/seg/sop.py
@@ -99,9 +99,14 @@ def __init__(
         """
         Parameters
         ----------
-        source_images: Sequence[Dataset]
+        source_images: Sequence[pydicom.dataset.Dataset]
             One or more single- or multi-frame images (or metadata of images)
-            from which the segmentation was derived
+            from which the segmentation was derived. The images must have the
+            same dimensions (rows, columns) and orientation, have the same frame
+            of reference, and contain the same number of frames.
+            In case of multi-frame images that are tiled (e.g., VL Whole Slide
+            Microscopy Image instances), the images may be from more multiple
+            series as long as the other requirements are satisfied.
         pixel_array: numpy.ndarray
             Array of segmentation pixel data of boolean, unsigned integer or
             floating point data type representing a mask image. The array may
@@ -274,29 +279,52 @@ def __init__(
         if len(source_images) == 0:
             raise ValueError('At least one source image is required.')
 
+        unique_sop_instance_uids = set(
+            [image.SOPInstanceUID for image in source_images]
+        )
+        if len(source_images) != len(unique_sop_instance_uids):
+            raise ValueError(
+                'Source images must all have a unique SOP Instance UID.'
+            )
+
         uniqueness_criteria = set(
             (
                 image.StudyInstanceUID,
-                image.SeriesInstanceUID,
                 image.Rows,
                 image.Columns,
+                int(getattr(image, 'NumberOfFrames', '1')),
+                hasattr(image, 'FrameOfReferenceUID'),
                 getattr(image, 'FrameOfReferenceUID', None),
+                hasattr(image, 'TotalPixelMatrixRows'),
+                getattr(image, 'TotalPixelMatrixRows', None),
+                hasattr(image, 'TotalPixelMatrixColumns'),
+                getattr(image, 'TotalPixelMatrixColumns', None),
+                hasattr(image, 'TotalPixelMatrixFocalPlanes'),
+                getattr(image, 'TotalPixelMatrixFocalPlanes', None),
+                tuple(getattr(image, 'ImageOrientation', [])),
+                tuple(getattr(image, 'ImageOrientationSlide', [])),
+                hasattr(image, 'DimensionOrganizationType'),
+                getattr(image, 'DimensionOrganizationType', None),
+                len(getattr(image, 'PerFrameFunctionalGroupsSequence', [])),
+                len(getattr(image, 'SharedFunctionalGroupsSequence', [])),
             )
             for image in source_images
         )
         if len(uniqueness_criteria) > 1:
             raise ValueError(
-                'Source images must all be part of the same series and must '
-                'have the same image dimensions (number of rows/columns).'
+                'Source images must all have the same image dimensions '
+                '(number of rows/columns) and image orientation, '
+                'have the same frame of reference, '
+                'and contain the same number of frames.'
             )
 
+        if pixel_array.ndim == 2:
+            pixel_array = pixel_array[np.newaxis, ...]
+        if pixel_array.ndim not in [3, 4]:
+            raise ValueError('Pixel array must be a 2D, 3D, or 4D array.')
+
         src_img = source_images[0]
         is_multiframe = hasattr(src_img, 'NumberOfFrames')
-        if is_multiframe and len(source_images) > 1:
-            raise ValueError(
-                'Only one source image should be provided in case images '
-                'are multi-frame images.'
-            )
         is_tiled = hasattr(src_img, 'TotalPixelMatrixRows')
         supported_transfer_syntaxes = {
             ImplicitVRLittleEndian,
@@ -310,11 +338,6 @@ def __init__(
                 f'Transfer syntax "{transfer_syntax_uid}" is not supported.'
             )
 
-        if pixel_array.ndim == 2:
-            pixel_array = pixel_array[np.newaxis, ...]
-        if pixel_array.ndim not in [3, 4]:
-            raise ValueError('Pixel array must be a 2D, 3D, or 4D array.')
-
         super().__init__(
             study_instance_uid=src_img.StudyInstanceUID,
             series_instance_uid=series_instance_uid,
@@ -342,6 +365,55 @@ def __init__(
             **kwargs
         )
 
+        # General Reference
+        self.SourceImageSequence: List[Dataset] = []
+        referenced_series: Dict[str, List[Dataset]] = defaultdict(list)
+        for s_img in source_images:
+            ref = Dataset()
+            ref.ReferencedSOPClassUID = s_img.SOPClassUID
+            ref.ReferencedSOPInstanceUID = s_img.SOPInstanceUID
+            self.SourceImageSequence.append(ref)
+            referenced_series[s_img.SeriesInstanceUID].append(ref)
+
+        if len(referenced_series) > 1:
+            if is_multiframe and not is_tiled:
+                raise ValueError(
+                    'If source images are multiple-frame images that are '
+                    'not tiled, then only a single source image from a single '
+                    'series must be provided.'
+                )
+            elif not is_multiframe:
+                raise ValueError(
+                    'If source images are single-frame images, then all '
+                    'source images must be from a single series.'
+                )
+
+        # Common Instance Reference
+        self.ReferencedSeriesSequence: List[Dataset] = []
+        for series_instance_uid, referenced_images in referenced_series.items():
+            if is_multiframe and not is_tiled:
+                if len(referenced_images) > 1:
+                    raise ValueError(
+                        'If source images are multiple-frame images that are '
+                        'not tiled, then only a single source image must be '
+                        f'provided. However, n={len(referenced_images)} images '
+                        f'were provided for series "{series_instance_uid}".'
+                    )
+            elif not is_multiframe:
+                if len(referenced_images) != pixel_array.shape[0]:
+                    raise ValueError(
+                        'If source images are single-frame images, then '
+                        f'then n={pixel_array.shape[0]} source images must be '
+                        'provided per series. '
+                        f'However, n={len(referenced_images)} images were '
+                        f'provided for series "{series_instance_uid}".'
+                    )
+
+            ref = Dataset()
+            ref.SeriesInstanceUID = series_instance_uid
+            ref.ReferencedInstanceSequence = list(referenced_images)
+            self.ReferencedSeriesSequence.append(ref)
+
         # Frame of Reference
         has_ref_frame_uid = hasattr(src_img, 'FrameOfReferenceUID')
         if has_ref_frame_uid:
@@ -395,24 +467,6 @@ def __init__(
                 )
             self._coordinate_system = None
 
-        # General Reference
-        self.SourceImageSequence: List[Dataset] = []
-        referenced_series: Dict[str, List[Dataset]] = defaultdict(list)
-        for s_img in source_images:
-            ref = Dataset()
-            ref.ReferencedSOPClassUID = s_img.SOPClassUID
-            ref.ReferencedSOPInstanceUID = s_img.SOPInstanceUID
-            self.SourceImageSequence.append(ref)
-            referenced_series[s_img.SeriesInstanceUID].append(ref)
-
-        # Common Instance Reference
-        self.ReferencedSeriesSequence: List[Dataset] = []
-        for series_instance_uid, referenced_images in referenced_series.items():
-            ref = Dataset()
-            ref.SeriesInstanceUID = series_instance_uid
-            ref.ReferencedInstanceSequence = referenced_images
-            self.ReferencedSeriesSequence.append(ref)
-
         # Image Pixel
         self.Rows = pixel_array.shape[1]
         self.Columns = pixel_array.shape[2]
@@ -742,6 +796,8 @@ def __init__(
             # bitpacking at the end
             full_pixel_array = np.array([], np.bool_)
 
+        derivation_code = codes.cid7203.Segmentation
+        purpose_code = codes.cid7202.SourceImageForImageProcessingOperation
         for i, segment_number in enumerate(described_segment_numbers):
             # Pixel array for just this segment
             if pixel_array.dtype in (np.float_, np.float32, np.float64):
@@ -787,17 +843,11 @@ def __init__(
                 # absent. Such frames should be removed
                 if omit_empty_frames and np.sum(planes[j]) == 0:
                     logger.info(
-                        'skip empty plane {} of segment #{}'.format(
-                            j, segment_number
-                        )
+                        f'skip empty plane {j} of segment #{segment_number}'
                     )
                     continue
                 contained_plane_index.append(j)
-                logger.info(
-                    'add plane #{} for segment #{}'.format(
-                        j, segment_number
-                    )
-                )
+                logger.info(f'add plane #{j} for segment #{segment_number}')
 
                 pffp_item = Dataset()
                 frame_content_item = Dataset()
@@ -837,9 +887,9 @@ def __init__(
                             ]
                     except IndexError as error:
                         raise IndexError(
-                            'Could not determine position of plane #{} in '
+                            f'Could not determine position of plane #{j} in '
                             'three dimensional coordinate system based on '
-                            'dimension index values: {}'.format(j, error)
+                            f'dimension index values: {error}'
                         )
                 frame_content_item.DimensionIndexValues = (
                     [segment_number] + index_values
@@ -858,43 +908,49 @@ def __init__(
                 pffp_item.DerivationImageSequence = []
 
                 if are_spatial_locations_preserved:
-                    derivation_image_item = Dataset()
-                    derivation_code = codes.cid7203.Segmentation
-                    derivation_image_item.DerivationCodeSequence = [
+                    derivation_img_item = Dataset()
+                    derivation_img_item.DerivationCodeSequence = [
                         CodedConcept.from_code(derivation_code)
                     ]
-
-                    derivation_src_img_item = Dataset()
-                    if hasattr(source_images[0], 'NumberOfFrames'):
-                        # A single multi-frame source image
-                        src_img_item = self.SourceImageSequence[0]
-                        # Frame numbers are one-based
-                        derivation_src_img_item.ReferencedFrameNumber = (
-                            source_image_index + 1
-                        )
-                    else:
-                        # Multiple single-frame source images
-                        src_img_item = self.SourceImageSequence[
-                            source_image_index
-                        ]
-                    derivation_src_img_item.ReferencedSOPClassUID = \
-                        src_img_item.ReferencedSOPClassUID
-                    derivation_src_img_item.ReferencedSOPInstanceUID = \
-                        src_img_item.ReferencedSOPInstanceUID
-                    purpose_code = \
-                        codes.cid7202.SourceImageForImageProcessingOperation
-                    derivation_src_img_item.PurposeOfReferenceCodeSequence = [
-                        CodedConcept.from_code(purpose_code)
-                    ]
-                    derivation_src_img_item.SpatialLocationsPreserved = 'YES'
-                    derivation_image_item.SourceImageSequence = [
-                        derivation_src_img_item,
-                    ]
+                    derivation_img_item.SourceImageSequence = []
+
+                    for _, referenced_images in referenced_series.items():
+                        if is_multiframe:
+                            for src_item in referenced_images:
+                                drv_src_item = Dataset()
+                                drv_src_item.ReferencedFrameNumber = (
+                                    source_image_index + 1
+                                )
+                                drv_src_item.ReferencedSOPClassUID = \
+                                    src_item.ReferencedSOPClassUID
+                                drv_src_item.ReferencedSOPInstanceUID = \
+                                    src_item.ReferencedSOPInstanceUID
+                                drv_src_item.PurposeOfReferenceCodeSequence = [
+                                    CodedConcept.from_code(purpose_code)
+                                ]
+                                drv_src_item.SpatialLocationsPreserved = 'YES'
+                                derivation_img_item.SourceImageSequence.append(
+                                    drv_src_item
+                                )
+                        else:
+                            src_item = referenced_images[source_image_index]
+                            drv_src_item = Dataset()
+                            drv_src_item.ReferencedSOPClassUID = \
+                                src_item.ReferencedSOPClassUID
+                            drv_src_item.ReferencedSOPInstanceUID = \
+                                src_item.ReferencedSOPInstanceUID
+                            drv_src_item.PurposeOfReferenceCodeSequence = [
+                                CodedConcept.from_code(purpose_code)
+                            ]
+                            drv_src_item.SpatialLocationsPreserved = 'YES'
+                            derivation_img_item.SourceImageSequence.append(
+                                drv_src_item
+                            )
                     pffp_item.DerivationImageSequence.append(
-                        derivation_image_item
+                        derivation_img_item
                     )
                 else:
-                    logger.warning('spatial locations not preserved')
+                    logger.warning('spatial locations are not preserved')
 
                 identification = Dataset()
                 identification.ReferencedSegmentNumber = segment_number