Source code for aicsimageio.readers.ome_tiff_reader

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import logging
import xml.etree.ElementTree as ET
from typing import Any, Dict, List, Optional, Tuple, Union
from urllib.error import URLError

import xarray as xr
from fsspec.implementations.local import LocalFileSystem
from fsspec.spec import AbstractFileSystem
from ome_types import OME, from_xml
from pydantic import ValidationError
from tifffile.tifffile import TiffFile, TiffFileError, TiffTags
from xmlschema import XMLSchemaValidationError
from xmlschema.exceptions import XMLSchemaValueError

from .. import constants, exceptions, transforms, types
from ..dimensions import (
    DEFAULT_CHUNK_DIMS,
    DEFAULT_DIMENSION_ORDER,
    DEFAULT_DIMENSION_ORDER_WITH_SAMPLES,
    DimensionNames,
)
from ..metadata import utils as metadata_utils
from ..types import PhysicalPixelSizes
from ..utils import io_utils
from .tiff_reader import TiffReader

###############################################################################

log = logging.getLogger(__name__)

###############################################################################


[docs]class OmeTiffReader(TiffReader):
    """
    Wraps the tifffile and ome-types APIs to provide the same aicsimageio Reader
    API but for volumetric OME-TIFF images.
    Parameters
    ----------
    image: types.PathLike
        Path to image file to construct Reader for.
    chunk_dims: List[str]
        Which dimensions to create chunks for.
        Default: DEFAULT_CHUNK_DIMS
        Note: Dimensions.SpatialY, Dimensions.SpatialX, and DimensionNames.Samples,
        will always be added to the list if not present during dask array
        construction.
    clean_metadata: bool
        Should the OME XML metadata found in the file be cleaned for known
        AICSImageIO 3.x and earlier created errors.
        Default: True (Clean the metadata for known errors)
    fs_kwargs: Dict[str, Any]
        Any specific keyword arguments to pass down to the fsspec created filesystem.
        Default: {}
    Notes
    -----
    If the OME metadata in your file isn't OME schema compilant or does not validate
    this will fail to read your file and raise an exception.
    If the OME metadata in your file doesn't use the latest OME schema (2016-06),
    this reader will make a request to the referenced remote OME schema to validate.
    """

    @staticmethod
    def _get_ome(ome_xml: str, clean_metadata: bool = True) -> OME:
        # To clean or not to clean, that is the question
        if clean_metadata:
            ome_xml = metadata_utils.clean_ome_xml_for_known_issues(ome_xml)

        return from_xml(ome_xml, parser="lxml")

    @staticmethod
    def _is_supported_image(
        fs: AbstractFileSystem, path: str, clean_metadata: bool = True, **kwargs: Any
    ) -> bool:
        try:
            with fs.open(path) as open_resource:
                with TiffFile(open_resource) as tiff:
                    # Get first page description (aka the description tag in general)
                    # after Tifffile version 2023.3.15 mmstack images read all scenes
                    # into tiff.pages[0]
                    xml = tiff.pages[0].description
                    ome = OmeTiffReader._get_ome(xml, clean_metadata)

                    # Handle no images in metadata
                    # this commonly means it is a "BinaryData" OME file
                    # i.e. a non-main OME-TIFF from MicroManager or similar
                    # in this case, because it's not the main file we want to just role
                    # back to TiffReader
                    if ome.binary_only:
                        return False

                    return True

        # tifffile exceptions
        except (TiffFileError, TypeError):
            return False

        # xml parse errors
        except ET.ParseError as e:
            log.debug(f"Failed to parse XML for the provided file. Error: {e}")
            return False

        # invalid OME XMl
        except (XMLSchemaValueError, XMLSchemaValidationError, ValidationError) as e:
            log.debug(f"OME XML validation failed. Error: {e}")
            return False

        # cant connect to external schema resource (no internet conection)
        except URLError as e:
            log.debug(
                f"Could not validate OME XML against referenced schema "
                f"(no internet connection). "
                f"Error: {e}"
            )
            return False

        except Exception as e:
            log.debug(f"Unhandled exception: {e}")
            return False

    @staticmethod
    def _guess_ome_dim_order(tiff: TiffFile, ome: OME, scene_index: int) -> List[str]:
        """
        Guess the dimension order based on OME metadata and actual TIFF data.
        Parameters
        -------
        tiff: TiffFile
            A constructed TIFF object to retrieve data from.
        ome: OME
            A constructed OME object to retrieve data from.
        scene_index: int
            The current operating scene index to pull metadata from.
        Returns
        -------
        dims: List[str]
            Educated guess of the dimension order for the file
        """
        dims_from_ome = metadata_utils.get_dims_from_ome(ome, scene_index)

        # Assumes the dimensions coming from here are align semantically
        # with the dimensions specified in this package. Possible T dimension
        # is not equivalent to T dimension here. However, any dimensions
        # not also found in OME will be omitted.
        dims_from_tiff_axes = list(tiff.series[scene_index].axes)

        # Adjust the guess of what the dimensions are based on the combined
        # information from the tiff axes and the OME metadata.
        # Necessary since while OME metadata should be source of truth, it
        # does not provide enough data to guess which dimension is Samples
        # for RGB files
        dims = [dim for dim in dims_from_ome if dim not in dims_from_tiff_axes]
        dims += [dim for dim in dims_from_tiff_axes if dim in dims_from_ome]
        return dims

[docs]    def __init__(
        self,
        image: types.PathLike,
        chunk_dims: Union[str, List[str]] = DEFAULT_CHUNK_DIMS,
        clean_metadata: bool = True,
        fs_kwargs: Dict[str, Any] = {},
        **kwargs: Any,
    ):
        # Expand details of provided image
        self._fs, self._path = io_utils.pathlike_to_fs(
            image,
            enforce_exists=True,
            fs_kwargs=fs_kwargs,
        )

        # Store params
        if isinstance(chunk_dims, str):
            chunk_dims = list(chunk_dims)

        self.chunk_dims = chunk_dims
        self.clean_metadata = clean_metadata

        # Enforce valid image
        if not self._is_supported_image(self._fs, self._path, clean_metadata):
            raise exceptions.UnsupportedFileFormatError(
                self.__class__.__name__, self._path
            )

        # Get ome-types object and warn of other behaviors
        with self._fs.open(self._path) as open_resource:
            with TiffFile(open_resource) as tiff:
                # Get and store OME
                self._ome = self._get_ome(
                    tiff.pages[0].description, self.clean_metadata
                )

                # Get and store scenes
                self._scenes: Tuple[str, ...] = tuple(
                    image_meta.id for image_meta in self._ome.images
                )

                # Log a warning stating that if this is a MM OME-TIFF, don't read
                # many series
                if tiff.is_micromanager and not isinstance(self._fs, LocalFileSystem):
                    log.warning(
                        "**Remote reading** (S3, GCS, HTTPS, etc.) of multi-image "
                        "(or scene) OME-TIFFs created by MicroManager has limited "
                        "support with the scene API. "
                        "It is recommended to use independent AICSImage or Reader "
                        "objects for each remote file instead of the `set_scene` API. "
                        "Track progress on support here: "
                        "https://github.com/AllenCellModeling/aicsimageio/issues/196"
                    )

    @property
    def scenes(self) -> Tuple[str, ...]:
        return self._scenes

    @staticmethod
    def _expand_dims_to_match_ome(
        image_data: types.ArrayLike,
        ome: OME,
        dims: List[str],
        scene_index: int,
    ) -> types.ArrayLike:
        # Expand image_data for empty dimensions
        ome_shape = []

        # need to correct channel count if this is a RGB image
        n_samples = ome.images[scene_index].pixels.channels[0].samples_per_pixel
        has_multiple_samples = n_samples is not None and n_samples > 1
        for d in dims:
            # SizeC can represent RGB (Samples) data rather
            # than channel data, whether or not this is the case depends
            # on what the SamplesPerPixel are for the channel
            if d == "C" and has_multiple_samples:
                count = len(ome.images[scene_index].pixels.channels)
            elif d == "S" and has_multiple_samples:
                count = n_samples
            else:
                count = getattr(ome.images[scene_index].pixels, f"size_{d.lower()}")
            ome_shape.append(count)

        # The file may not have all the data but OME requires certain dimensions
        # expand to fill
        expand_dim_ops: List[Optional[slice]] = []
        for d_size in ome_shape:
            # Add empty dimension where OME requires dimension but no data exists
            if d_size == 1:
                expand_dim_ops.append(None)
            # Add full slice where data exists
            else:
                expand_dim_ops.append(slice(None, None, None))

        # Apply operators to dask array
        return image_data[tuple(expand_dim_ops)]

    def _general_data_array_constructor(
        self,
        image_data: types.ArrayLike,
        dims: List[str],
        coords: Dict[str, Union[List[Any], types.ArrayLike]],
        tiff_tags: TiffTags,
    ) -> xr.DataArray:
        # Expand the image data to match the OME empty dimensions
        image_data = self._expand_dims_to_match_ome(
            image_data=image_data,
            ome=self._ome,
            dims=dims,
            scene_index=self.current_scene_index,
        )

        # Always order array
        if DimensionNames.Samples in dims:
            out_order = DEFAULT_DIMENSION_ORDER_WITH_SAMPLES
        else:
            out_order = DEFAULT_DIMENSION_ORDER

        # Transform into order
        image_data = transforms.reshape_data(
            image_data,
            "".join(dims),
            out_order,
        )

        # Reset dims after transform
        dims = [d for d in out_order]

        return xr.DataArray(
            image_data,
            dims=dims,
            coords=coords,
            attrs={
                constants.METADATA_UNPROCESSED: tiff_tags,
                constants.METADATA_PROCESSED: self._ome,
            },
        )

    def _read_delayed(self) -> xr.DataArray:
        """
        Construct the delayed xarray DataArray object for the image.
        Returns
        -------
        image: xr.DataArray
            The fully constructed and fully delayed image as a DataArray object.
            Metadata is attached in some cases as coords, dims, and attrs contains
            unprocessed tags and processed OME object.
        Raises
        ------
        exceptions.UnsupportedFileFormatError
            The file could not be read or is not supported.
        """
        with self._fs.open(self._path) as open_resource:
            with TiffFile(open_resource) as tiff:
                # Get unprocessed metadata from tags
                tiff_tags = self._get_tiff_tags(tiff)

                # Unpack coords from OME
                coords = metadata_utils.get_coords_from_ome(
                    ome=self._ome,
                    scene_index=self.current_scene_index,
                )

                # Guess the dim order based on metadata and actual tiff data
                dims = OmeTiffReader._guess_ome_dim_order(
                    tiff, self._ome, self.current_scene_index
                )

                # Grab the tifffile axes to use for dask array construction
                # If any of the non-"standard" dims are present
                # they will be filtered out during later reshape data calls
                strictly_read_dims = list(tiff.series[self.current_scene_index].axes)

                # Create the delayed dask array
                image_data = self._create_dask_array(tiff, strictly_read_dims)

                return self._general_data_array_constructor(
                    image_data,
                    dims,
                    coords,
                    tiff_tags,
                )

    def _read_immediate(self) -> xr.DataArray:
        """
        Construct the in-memory xarray DataArray object for the image.
        Returns
        -------
        image: xr.DataArray
            The fully constructed and fully read into memory image as a DataArray
            object. Metadata is attached in some cases as coords, dims, and attrs
            contains unprocessed tags and processed OME object.
        Raises
        ------
        exceptions.UnsupportedFileFormatError
            The file could not be read or is not supported.
        """
        with self._fs.open(self._path) as open_resource:
            with TiffFile(open_resource) as tiff:
                # Get unprocessed metadata from tags
                tiff_tags = self._get_tiff_tags(tiff)

                # Unpack coords from OME
                coords = metadata_utils.get_coords_from_ome(
                    ome=self._ome,
                    scene_index=self.current_scene_index,
                )

                # Guess the dim order based on metadata and actual tiff data
                dims = OmeTiffReader._guess_ome_dim_order(
                    tiff, self._ome, self.current_scene_index
                )

                # Read image into memory
                image_data = tiff.series[self.current_scene_index].asarray()

                return self._general_data_array_constructor(
                    image_data,
                    dims,
                    coords,
                    tiff_tags,
                )

    @property
    def ome_metadata(self) -> OME:
        return self.metadata

    @property
    def physical_pixel_sizes(self) -> PhysicalPixelSizes:
        """
        Returns
        -------
        sizes: PhysicalPixelSizes
            Using available metadata, the floats representing physical pixel sizes for
            dimensions Z, Y, and X.
        Notes
        -----
        We currently do not handle unit attachment to these values. Please see the file
        metadata for unit information.
        """
        return metadata_utils.physical_pixel_sizes(
            self.metadata, self.current_scene_index
        )