Source code for aicsimageio.readers.ome_tiff_reader

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import logging
import xml.etree.ElementTree as ET
from typing import Any, Dict, List, Optional, Tuple, Union
from urllib.error import URLError

import xarray as xr
from fsspec.implementations.local import LocalFileSystem
from fsspec.spec import AbstractFileSystem
from ome_types import OME, from_xml
from pydantic import ValidationError
from tifffile.tifffile import TiffFile, TiffFileError, TiffTags
from xmlschema import XMLSchemaValidationError
from xmlschema.exceptions import XMLSchemaValueError

from .. import constants, exceptions, transforms, types
from ..dimensions import (
    DEFAULT_CHUNK_DIMS,
    DEFAULT_DIMENSION_ORDER,
    DEFAULT_DIMENSION_ORDER_WITH_SAMPLES,
    DimensionNames,
)
from ..metadata import utils as metadata_utils
from ..types import PhysicalPixelSizes
from ..utils import io_utils
from .tiff_reader import TiffReader

###############################################################################

log = logging.getLogger(__name__)

###############################################################################


[docs]class OmeTiffReader(TiffReader): """ Wraps the tifffile and ome-types APIs to provide the same aicsimageio Reader API but for volumetric OME-TIFF images. Parameters ---------- image: types.PathLike Path to image file to construct Reader for. chunk_dims: List[str] Which dimensions to create chunks for. Default: DEFAULT_CHUNK_DIMS Note: Dimensions.SpatialY, Dimensions.SpatialX, and DimensionNames.Samples, will always be added to the list if not present during dask array construction. clean_metadata: bool Should the OME XML metadata found in the file be cleaned for known AICSImageIO 3.x and earlier created errors. Default: True (Clean the metadata for known errors) fs_kwargs: Dict[str, Any] Any specific keyword arguments to pass down to the fsspec created filesystem. Default: {} Notes ----- If the OME metadata in your file isn't OME schema compilant or does not validate this will fail to read your file and raise an exception. If the OME metadata in your file doesn't use the latest OME schema (2016-06), this reader will make a request to the referenced remote OME schema to validate. """ @staticmethod def _get_ome(ome_xml: str, clean_metadata: bool = True) -> OME: # To clean or not to clean, that is the question if clean_metadata: ome_xml = metadata_utils.clean_ome_xml_for_known_issues(ome_xml) return from_xml(ome_xml, parser="lxml") @staticmethod def _is_supported_image( fs: AbstractFileSystem, path: str, clean_metadata: bool = True, **kwargs: Any ) -> bool: try: with fs.open(path) as open_resource: with TiffFile(open_resource) as tiff: # Get first page description (aka the description tag in general) # after Tifffile version 2023.3.15 mmstack images read all scenes # into tiff.pages[0] xml = tiff.pages[0].description ome = OmeTiffReader._get_ome(xml, clean_metadata) # Handle no images in metadata # this commonly means it is a "BinaryData" OME file # i.e. a non-main OME-TIFF from MicroManager or similar # in this case, because it's not the main file we want to just role # back to TiffReader if ome.binary_only: return False return True # tifffile exceptions except (TiffFileError, TypeError): return False # xml parse errors except ET.ParseError as e: log.debug(f"Failed to parse XML for the provided file. Error: {e}") return False # invalid OME XMl except (XMLSchemaValueError, XMLSchemaValidationError, ValidationError) as e: log.debug(f"OME XML validation failed. Error: {e}") return False # cant connect to external schema resource (no internet conection) except URLError as e: log.debug( f"Could not validate OME XML against referenced schema " f"(no internet connection). " f"Error: {e}" ) return False except Exception as e: log.debug(f"Unhandled exception: {e}") return False @staticmethod def _guess_ome_dim_order(tiff: TiffFile, ome: OME, scene_index: int) -> List[str]: """ Guess the dimension order based on OME metadata and actual TIFF data. Parameters ------- tiff: TiffFile A constructed TIFF object to retrieve data from. ome: OME A constructed OME object to retrieve data from. scene_index: int The current operating scene index to pull metadata from. Returns ------- dims: List[str] Educated guess of the dimension order for the file """ dims_from_ome = metadata_utils.get_dims_from_ome(ome, scene_index) # Assumes the dimensions coming from here are align semantically # with the dimensions specified in this package. Possible T dimension # is not equivalent to T dimension here. However, any dimensions # not also found in OME will be omitted. dims_from_tiff_axes = list(tiff.series[scene_index].axes) # Adjust the guess of what the dimensions are based on the combined # information from the tiff axes and the OME metadata. # Necessary since while OME metadata should be source of truth, it # does not provide enough data to guess which dimension is Samples # for RGB files dims = [dim for dim in dims_from_ome if dim not in dims_from_tiff_axes] dims += [dim for dim in dims_from_tiff_axes if dim in dims_from_ome] return dims
[docs] def __init__( self, image: types.PathLike, chunk_dims: Union[str, List[str]] = DEFAULT_CHUNK_DIMS, clean_metadata: bool = True, fs_kwargs: Dict[str, Any] = {}, **kwargs: Any, ): # Expand details of provided image self._fs, self._path = io_utils.pathlike_to_fs( image, enforce_exists=True, fs_kwargs=fs_kwargs, ) # Store params if isinstance(chunk_dims, str): chunk_dims = list(chunk_dims) self.chunk_dims = chunk_dims self.clean_metadata = clean_metadata # Enforce valid image if not self._is_supported_image(self._fs, self._path, clean_metadata): raise exceptions.UnsupportedFileFormatError( self.__class__.__name__, self._path ) # Get ome-types object and warn of other behaviors with self._fs.open(self._path) as open_resource: with TiffFile(open_resource) as tiff: # Get and store OME self._ome = self._get_ome( tiff.pages[0].description, self.clean_metadata ) # Get and store scenes self._scenes: Tuple[str, ...] = tuple( image_meta.id for image_meta in self._ome.images ) # Log a warning stating that if this is a MM OME-TIFF, don't read # many series if tiff.is_micromanager and not isinstance(self._fs, LocalFileSystem): log.warning( "**Remote reading** (S3, GCS, HTTPS, etc.) of multi-image " "(or scene) OME-TIFFs created by MicroManager has limited " "support with the scene API. " "It is recommended to use independent AICSImage or Reader " "objects for each remote file instead of the `set_scene` API. " "Track progress on support here: " "https://github.com/AllenCellModeling/aicsimageio/issues/196" )
@property def scenes(self) -> Tuple[str, ...]: return self._scenes @staticmethod def _expand_dims_to_match_ome( image_data: types.ArrayLike, ome: OME, dims: List[str], scene_index: int, ) -> types.ArrayLike: # Expand image_data for empty dimensions ome_shape = [] # need to correct channel count if this is a RGB image n_samples = ome.images[scene_index].pixels.channels[0].samples_per_pixel has_multiple_samples = n_samples is not None and n_samples > 1 for d in dims: # SizeC can represent RGB (Samples) data rather # than channel data, whether or not this is the case depends # on what the SamplesPerPixel are for the channel if d == "C" and has_multiple_samples: count = len(ome.images[scene_index].pixels.channels) elif d == "S" and has_multiple_samples: count = n_samples else: count = getattr(ome.images[scene_index].pixels, f"size_{d.lower()}") ome_shape.append(count) # The file may not have all the data but OME requires certain dimensions # expand to fill expand_dim_ops: List[Optional[slice]] = [] for d_size in ome_shape: # Add empty dimension where OME requires dimension but no data exists if d_size == 1: expand_dim_ops.append(None) # Add full slice where data exists else: expand_dim_ops.append(slice(None, None, None)) # Apply operators to dask array return image_data[tuple(expand_dim_ops)] def _general_data_array_constructor( self, image_data: types.ArrayLike, dims: List[str], coords: Dict[str, Union[List[Any], types.ArrayLike]], tiff_tags: TiffTags, ) -> xr.DataArray: # Expand the image data to match the OME empty dimensions image_data = self._expand_dims_to_match_ome( image_data=image_data, ome=self._ome, dims=dims, scene_index=self.current_scene_index, ) # Always order array if DimensionNames.Samples in dims: out_order = DEFAULT_DIMENSION_ORDER_WITH_SAMPLES else: out_order = DEFAULT_DIMENSION_ORDER # Transform into order image_data = transforms.reshape_data( image_data, "".join(dims), out_order, ) # Reset dims after transform dims = [d for d in out_order] return xr.DataArray( image_data, dims=dims, coords=coords, attrs={ constants.METADATA_UNPROCESSED: tiff_tags, constants.METADATA_PROCESSED: self._ome, }, ) def _read_delayed(self) -> xr.DataArray: """ Construct the delayed xarray DataArray object for the image. Returns ------- image: xr.DataArray The fully constructed and fully delayed image as a DataArray object. Metadata is attached in some cases as coords, dims, and attrs contains unprocessed tags and processed OME object. Raises ------ exceptions.UnsupportedFileFormatError The file could not be read or is not supported. """ with self._fs.open(self._path) as open_resource: with TiffFile(open_resource) as tiff: # Get unprocessed metadata from tags tiff_tags = self._get_tiff_tags(tiff) # Unpack coords from OME coords = metadata_utils.get_coords_from_ome( ome=self._ome, scene_index=self.current_scene_index, ) # Guess the dim order based on metadata and actual tiff data dims = OmeTiffReader._guess_ome_dim_order( tiff, self._ome, self.current_scene_index ) # Grab the tifffile axes to use for dask array construction # If any of the non-"standard" dims are present # they will be filtered out during later reshape data calls strictly_read_dims = list(tiff.series[self.current_scene_index].axes) # Create the delayed dask array image_data = self._create_dask_array(tiff, strictly_read_dims) return self._general_data_array_constructor( image_data, dims, coords, tiff_tags, ) def _read_immediate(self) -> xr.DataArray: """ Construct the in-memory xarray DataArray object for the image. Returns ------- image: xr.DataArray The fully constructed and fully read into memory image as a DataArray object. Metadata is attached in some cases as coords, dims, and attrs contains unprocessed tags and processed OME object. Raises ------ exceptions.UnsupportedFileFormatError The file could not be read or is not supported. """ with self._fs.open(self._path) as open_resource: with TiffFile(open_resource) as tiff: # Get unprocessed metadata from tags tiff_tags = self._get_tiff_tags(tiff) # Unpack coords from OME coords = metadata_utils.get_coords_from_ome( ome=self._ome, scene_index=self.current_scene_index, ) # Guess the dim order based on metadata and actual tiff data dims = OmeTiffReader._guess_ome_dim_order( tiff, self._ome, self.current_scene_index ) # Read image into memory image_data = tiff.series[self.current_scene_index].asarray() return self._general_data_array_constructor( image_data, dims, coords, tiff_tags, ) @property def ome_metadata(self) -> OME: return self.metadata @property def physical_pixel_sizes(self) -> PhysicalPixelSizes: """ Returns ------- sizes: PhysicalPixelSizes Using available metadata, the floats representing physical pixel sizes for dimensions Z, Y, and X. Notes ----- We currently do not handle unit attachment to these values. Please see the file metadata for unit information. """ return metadata_utils.physical_pixel_sizes( self.metadata, self.current_scene_index )